Skip to content

Commit a7df4a8

Browse files
committed
Implement native ESRI reader
1 parent 8c62da3 commit a7df4a8

File tree

17 files changed

+6723
-4
lines changed

17 files changed

+6723
-4
lines changed

Diff for: lib/trino-hive-formats/pom.xml

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
</properties>
1818

1919
<dependencies>
20+
<dependency>
21+
<groupId>com.esri.geometry</groupId>
22+
<artifactId>esri-geometry-api</artifactId>
23+
</dependency>
24+
2025
<dependency>
2126
<groupId>com.fasterxml.jackson.core</groupId>
2227
<artifactId>jackson-core</artifactId>

Diff for: lib/trino-hive-formats/src/main/java/io/trino/hive/formats/HiveClassNames.java

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public final class HiveClassNames
4545
public static final String SEQUENCEFILE_INPUT_FORMAT_CLASS = "org.apache.hadoop.mapred.SequenceFileInputFormat";
4646
public static final String SYMLINK_TEXT_INPUT_FORMAT_CLASS = "org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat";
4747
public static final String TEXT_INPUT_FORMAT_CLASS = "org.apache.hadoop.mapred.TextInputFormat";
48+
public static final String ESRI_SERDE_CLASS = "com.esri.hadoop.hive.serde.EsriJsonSerDe";
49+
public static final String ESRI_INPUT_FORMAT_CLASS = "com.esri.json.hadoop.EnclosedEsriJsonInputFormat";
4850

4951
private HiveClassNames() {}
5052
}

Diff for: lib/trino-hive-formats/src/main/java/io/trino/hive/formats/esri/EsriDeserializer.java

+383
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.hive.formats.esri;
15+
16+
import com.fasterxml.jackson.core.JsonFactory;
17+
import com.fasterxml.jackson.core.JsonParser;
18+
import com.fasterxml.jackson.core.JsonToken;
19+
import com.google.common.io.Closer;
20+
import com.google.common.io.CountingInputStream;
21+
import io.trino.spi.PageBuilder;
22+
23+
import java.io.Closeable;
24+
import java.io.IOException;
25+
import java.io.InputStream;
26+
27+
import static com.fasterxml.jackson.core.JsonFactory.Feature.INTERN_FIELD_NAMES;
28+
import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder;
29+
import static java.util.Objects.requireNonNull;
30+
31+
public class EsriReader
32+
implements Closeable
33+
{
34+
private static final JsonFactory JSON_FACTORY = jsonFactoryBuilder()
35+
.disable(INTERN_FIELD_NAMES)
36+
.build();
37+
38+
private final CountingInputStream inputStream;
39+
private final EsriDeserializer esriDeserializer;
40+
private JsonParser parser;
41+
private long readTimeNanos;
42+
private boolean closed;
43+
44+
private static final String FEATURES_ARRAY_NAME = "features";
45+
46+
public EsriReader(InputStream inputStream, EsriDeserializer esriDeserializer)
47+
throws IOException
48+
{
49+
requireNonNull(inputStream, "inputStream is null");
50+
this.inputStream = new CountingInputStream(inputStream);
51+
this.esriDeserializer = requireNonNull(esriDeserializer, "esriDeserializer is null");
52+
53+
this.initializeParser();
54+
}
55+
56+
private void initializeParser()
57+
throws IOException
58+
{
59+
parser = JSON_FACTORY.createParser(inputStream);
60+
61+
// Find features array
62+
while (true) {
63+
JsonToken token = parser.nextToken();
64+
if (token == null) {
65+
return;
66+
}
67+
if (token == JsonToken.START_ARRAY &&
68+
FEATURES_ARRAY_NAME.equals(parser.currentName())) {
69+
break;
70+
}
71+
}
72+
}
73+
74+
@Override
75+
public void close()
76+
throws IOException
77+
{
78+
closed = true;
79+
try (Closer closer = Closer.create()) {
80+
closer.register(inputStream);
81+
closer.register(parser);
82+
}
83+
}
84+
85+
public boolean next(PageBuilder pageBuilder)
86+
throws IOException
87+
{
88+
long start = System.nanoTime();
89+
90+
try {
91+
JsonToken token = parser.nextToken();
92+
if (token == JsonToken.START_OBJECT && parser.currentName() == null) {
93+
esriDeserializer.deserialize(pageBuilder, parser);
94+
return true;
95+
}
96+
else {
97+
return false;
98+
}
99+
}
100+
finally {
101+
long duration = System.nanoTime() - start;
102+
readTimeNanos += duration;
103+
}
104+
}
105+
106+
public long getBytesRead()
107+
{
108+
return inputStream.getCount();
109+
}
110+
111+
public long getReadTimeNanos()
112+
{
113+
return readTimeNanos;
114+
}
115+
116+
public boolean isClosed()
117+
{
118+
return closed;
119+
}
120+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.hive.formats.esri;
15+
16+
public enum OGCType {
17+
UNKNOWN(0),
18+
ST_POINT(1),
19+
ST_LINESTRING(2),
20+
ST_POLYGON(3),
21+
ST_MULTIPOINT(4),
22+
ST_MULTILINESTRING(5),
23+
ST_MULTIPOLYGON(6);
24+
25+
private final int index;
26+
27+
OGCType(int index)
28+
{
29+
this.index = index;
30+
}
31+
32+
public int getIndex()
33+
{
34+
return this.index;
35+
}
36+
}

0 commit comments

Comments
 (0)