diff --git a/flink-connector-elasticsearch8/pom.xml b/flink-connector-elasticsearch8/pom.xml index 76bc7e0c..cbef9d1b 100644 --- a/flink-connector-elasticsearch8/pom.xml +++ b/flink-connector-elasticsearch8/pom.xml @@ -138,6 +138,27 @@ under the License. test-jar test + + + org.apache.flink + flink-table-runtime + ${flink.version} + provided + + + + org.apache.flink + flink-table-planner-loader + ${flink.version} + test + + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${jackson.version} + + diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncSinkBuilder.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncSinkBuilder.java index 31e81200..2e8dc9ab 100644 --- a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncSinkBuilder.java +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncSinkBuilder.java @@ -22,6 +22,7 @@ package org.apache.flink.connector.elasticsearch.sink; import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.api.connector.sink2.WriterInitContext; import org.apache.flink.connector.base.sink.AsyncSinkBaseBuilder; import org.apache.flink.connector.base.sink.writer.ElementConverter; import org.apache.flink.util.function.SerializableSupplier; @@ -80,6 +81,15 @@ public class Elasticsearch8AsyncSinkBuilder */ private ElementConverter elementConverter; + /** the path's prefix for every request. */ + private String connectionPathPrefix; + + private Integer connectionTimeout; + + private Integer connectionRequestTimeout; + + private Integer socketTimeout; + private SerializableSupplier sslContextSupplier; private SerializableSupplier sslHostnameVerifier; @@ -97,6 +107,28 @@ public Elasticsearch8AsyncSinkBuilder setHosts(HttpHost... hosts) { return this; } + public Elasticsearch8AsyncSinkBuilder setConnectionPathPrefix( + String connectionPathPrefix) { + this.connectionPathPrefix = connectionPathPrefix; + return this; + } + + public Elasticsearch8AsyncSinkBuilder setConnectionTimeout(Integer connectionTimeout) { + this.connectionTimeout = connectionTimeout; + return this; + } + + public Elasticsearch8AsyncSinkBuilder setConnectionRequestTimeout( + Integer connectionRequestTimeout) { + this.connectionRequestTimeout = connectionRequestTimeout; + return this; + } + + public Elasticsearch8AsyncSinkBuilder setSocketTimeout(Integer socketTimeout) { + this.socketTimeout = socketTimeout; + return this; + } + /** * setHeaders set the headers to be sent with the requests made to Elasticsearch cluster.. * @@ -239,7 +271,16 @@ private OperationConverter buildOperationConverter( private NetworkConfig buildNetworkConfig() { checkArgument(!hosts.isEmpty(), "Hosts cannot be empty."); return new NetworkConfig( - hosts, username, password, headers, sslContextSupplier, sslHostnameVerifier); + hosts, + username, + password, + headers, + connectionPathPrefix, + connectionRequestTimeout, + connectionTimeout, + socketTimeout, + sslContextSupplier, + sslHostnameVerifier); } /** A wrapper that evolves the Operation, since a BulkOperationVariant is not Serializable. */ @@ -250,6 +291,12 @@ public OperationConverter(ElementConverter converter) { this.converter = converter; } + @Override + public void open(WriterInitContext context) { + // call converter.open() before calling converter.apply() + converter.open(context); + } + @Override public Operation apply(T element, SinkWriter.Context context) { return new Operation(converter.apply(element, context)); diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/NetworkConfig.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/NetworkConfig.java index 9a1cd9c1..93ecd783 100644 --- a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/NetworkConfig.java +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/sink/NetworkConfig.java @@ -26,6 +26,12 @@ import co.elastic.clients.elasticsearch.ElasticsearchAsyncClient; import co.elastic.clients.json.jackson.JacksonJsonpMapper; import co.elastic.clients.transport.rest_client.RestClientTransport; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.json.JsonMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import com.fasterxml.jackson.datatype.jsr310.ser.LocalDateSerializer; +import com.fasterxml.jackson.datatype.jsr310.ser.LocalDateTimeSerializer; +import com.fasterxml.jackson.datatype.jsr310.ser.LocalTimeSerializer; import org.apache.http.Header; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; @@ -40,6 +46,10 @@ import javax.net.ssl.SSLContext; import java.io.Serializable; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.format.DateTimeFormatter; import java.util.List; import static org.apache.flink.util.Preconditions.checkState; @@ -47,36 +57,55 @@ /** A factory that creates valid ElasticsearchClient instances. */ public class NetworkConfig implements Serializable { private final List hosts; - private final List
headers; - private final String username; - private final String password; - + @Nullable private final String connectionPathPrefix; + @Nullable Integer connectionRequestTimeout; + @Nullable Integer connectionTimeout; + @Nullable Integer socketTimeout; @Nullable private final SerializableSupplier sslContextSupplier; - @Nullable private final SerializableSupplier sslHostnameVerifier; + private static final DateTimeFormatter DATE_TIME_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + private static final DateTimeFormatter DATE_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd"); + private static final DateTimeFormatter TIME_FORMATTER = DateTimeFormatter.ofPattern("HH:mm:ss"); public NetworkConfig( List hosts, String username, String password, List
headers, - SerializableSupplier sslContextSupplier, - SerializableSupplier sslHostnameVerifier) { + @Nullable String connectionPathPrefix, + @Nullable Integer connectionRequestTimeout, + @Nullable Integer connectionTimeout, + @Nullable Integer socketTimeout, + @Nullable SerializableSupplier sslContextSupplier, + @Nullable SerializableSupplier sslHostnameVerifier) { checkState(!hosts.isEmpty(), "Hosts must not be empty"); this.hosts = hosts; this.username = username; this.password = password; this.headers = headers; + this.connectionRequestTimeout = connectionRequestTimeout; + this.connectionTimeout = connectionTimeout; + this.socketTimeout = socketTimeout; + this.connectionPathPrefix = connectionPathPrefix; this.sslContextSupplier = sslContextSupplier; this.sslHostnameVerifier = sslHostnameVerifier; } public ElasticsearchAsyncClient createEsClient() { + // the JavaTimeModule is added to provide support for java 8 Time classes. + JavaTimeModule javaTimeModule = new JavaTimeModule(); + javaTimeModule.addSerializer( + LocalDateTime.class, new LocalDateTimeSerializer(DATE_TIME_FORMATTER)); + javaTimeModule.addSerializer(LocalDate.class, new LocalDateSerializer(DATE_FORMATTER)); + javaTimeModule.addSerializer(LocalTime.class, new LocalTimeSerializer(TIME_FORMATTER)); + ObjectMapper mapper = JsonMapper.builder().addModule(javaTimeModule).build(); return new ElasticsearchAsyncClient( - new RestClientTransport(this.getRestClient(), new JacksonJsonpMapper())); + new RestClientTransport(this.getRestClient(), new JacksonJsonpMapper(mapper))); } private RestClient getRestClient() { @@ -105,6 +134,29 @@ private RestClient getRestClient() { restClientBuilder.setDefaultHeaders(headers.toArray(new Header[0])); } + if (connectionPathPrefix != null) { + restClientBuilder.setPathPrefix(connectionPathPrefix); + } + + if (connectionRequestTimeout != null + || connectionTimeout != null + || socketTimeout != null) { + restClientBuilder.setRequestConfigCallback( + requestConfigBuilder -> { + if (connectionRequestTimeout != null) { + requestConfigBuilder.setConnectionRequestTimeout( + connectionRequestTimeout); + } + if (connectionTimeout != null) { + requestConfigBuilder.setConnectTimeout(connectionTimeout); + } + if (socketTimeout != null) { + requestConfigBuilder.setSocketTimeout(socketTimeout); + } + return requestConfigBuilder; + }); + } + return restClientBuilder.build(); } diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/AbstractTimeIndexGenerator.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/AbstractTimeIndexGenerator.java new file mode 100644 index 00000000..4a562c6b --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/AbstractTimeIndexGenerator.java @@ -0,0 +1,23 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; + +import java.time.format.DateTimeFormatter; + +/** Abstract class for time related {@link IndexGenerator}. */ +@Internal +abstract class AbstractTimeIndexGenerator extends IndexGeneratorBase { + + private final String dateTimeFormat; + protected transient DateTimeFormatter dateTimeFormatter; + + public AbstractTimeIndexGenerator(String index, String dateTimeFormat) { + super(index); + this.dateTimeFormat = dateTimeFormat; + } + + @Override + public void open() { + this.dateTimeFormatter = DateTimeFormatter.ofPattern(dateTimeFormat); + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicSink.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicSink.java new file mode 100644 index 00000000..52795190 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicSink.java @@ -0,0 +1,214 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.connector.base.table.sink.AsyncDynamicTableSink; +import org.apache.flink.connector.base.table.sink.AsyncDynamicTableSinkBuilder; +import org.apache.flink.connector.elasticsearch.sink.Elasticsearch8AsyncSinkBuilder; +import org.apache.flink.connector.elasticsearch.sink.Operation; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.StringUtils; + +import org.apache.http.HttpHost; + +import java.time.ZoneId; +import java.util.List; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** ElasticSearch backed {@link AsyncDynamicTableSink}. */ +@Internal +public class ElasticSearch8AsyncDynamicSink extends AsyncDynamicTableSink { + final transient EncodingFormat> format; + final DataType physicalRowDataType; + final List primaryKeyLogicalTypesWithIndex; + final Elasticsearch8Configuration config; + final ZoneId localTimeZoneId; + + final String summaryString; + final boolean isDynamicIndexWithSystemTime; + + public ElasticSearch8AsyncDynamicSink( + EncodingFormat> format, + Elasticsearch8Configuration config, + List primaryKeyLogicalTypesWithIndex, + DataType physicalRowDataType, + String summaryString, + ZoneId localTimeZoneId) { + super( + config.getBulkFlushMaxActions(), + config.getBulkFlushMaxInFlightActions(), + config.getBulkFlushMaxBufferedActions(), + config.getBulkFlushMaxByteSize().getBytes(), + config.getBulkFlushInterval()); + this.format = checkNotNull(format); + this.physicalRowDataType = checkNotNull(physicalRowDataType); + this.primaryKeyLogicalTypesWithIndex = checkNotNull(primaryKeyLogicalTypesWithIndex); + this.config = checkNotNull(config); + this.summaryString = checkNotNull(summaryString); + this.localTimeZoneId = localTimeZoneId; + this.isDynamicIndexWithSystemTime = isDynamicIndexWithSystemTime(); + } + + public boolean isDynamicIndexWithSystemTime() { + IndexGeneratorFactory.IndexHelper indexHelper = new IndexGeneratorFactory.IndexHelper(); + return indexHelper.checkIsDynamicIndexWithSystemTimeFormat(config.getIndex()); + } + + Function createKeyExtractor() { + return KeyExtractor.createKeyExtractor( + primaryKeyLogicalTypesWithIndex, config.getKeyDelimiter()); + } + + IndexGenerator createIndexGenerator() { + return IndexGeneratorFactory.createIndexGenerator( + config.getIndex(), + DataType.getFieldNames(physicalRowDataType), + DataType.getFieldDataTypes(physicalRowDataType), + localTimeZoneId); + } + + @Override + public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { + ChangelogMode.Builder builder = ChangelogMode.newBuilder(); + for (RowKind kind : requestedMode.getContainedKinds()) { + if (kind != RowKind.UPDATE_BEFORE) { + builder.addContainedKind(kind); + } + } + if (isDynamicIndexWithSystemTime && !requestedMode.containsOnly(RowKind.INSERT)) { + throw new ValidationException( + "Dynamic indexing based on system time only works on append only stream."); + } + return builder.build(); + } + + @Override + public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { + Elasticsearch8AsyncSinkBuilder builder = new Elasticsearch8AsyncSinkBuilder<>(); + + builder.setHosts(config.getHosts().toArray(new HttpHost[0])); + builder.setMaxBatchSize(config.getBulkFlushMaxActions()); + builder.setMaxBufferedRequests(config.getBulkFlushMaxBufferedActions()); + builder.setMaxBatchSizeInBytes(config.getBulkFlushMaxByteSize().getBytes()); + builder.setMaxTimeInBufferMS(config.getBulkFlushInterval()); + builder.setElementConverter( + new RowDataElementConverter( + physicalRowDataType, createIndexGenerator(), createKeyExtractor())); + + if (config.getUsername().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) { + builder.setUsername(config.getUsername().get()); + } + + if (config.getPassword().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get())) { + builder.setPassword(config.getPassword().get()); + } + + if (config.getPathPrefix().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPathPrefix().get())) { + builder.setConnectionPathPrefix(config.getPathPrefix().get()); + } + + if (config.getConnectionRequestTimeout().isPresent()) { + builder.setConnectionRequestTimeout( + (int) config.getConnectionRequestTimeout().get().getSeconds()); + } + + if (config.getConnectionTimeout().isPresent()) { + builder.setConnectionTimeout((int) config.getConnectionTimeout().get().getSeconds()); + } + + if (config.getSocketTimeout().isPresent()) { + builder.setSocketTimeout((int) config.getSocketTimeout().get().getSeconds()); + } + + if (config.getCertificateFingerprint().isPresent()) { + builder.setCertificateFingerprint(config.getCertificateFingerprint().get()); + } + + return SinkV2Provider.of(builder.build(), config.getParallelism().orElse(null)); + } + + @Override + public DynamicTableSink copy() { + return new ElasticSearch8AsyncDynamicSink( + format, + config, + primaryKeyLogicalTypesWithIndex, + physicalRowDataType, + summaryString, + localTimeZoneId); + } + + @Override + public String asSummaryString() { + return summaryString; + } + + /** Builder class for {@link ElasticSearch8AsyncDynamicSink}. */ + @Internal + public static class ElasticSearch8AsyncDynamicSinkBuilder + extends AsyncDynamicTableSinkBuilder { + + DataType physicalRowDataType; + List primaryKeyLogicalTypesWithIndex; + ZoneId localTimeZoneId; + String summaryString; + Elasticsearch8Configuration config; + EncodingFormat> format; + + public ElasticSearch8AsyncDynamicSinkBuilder setConfig(Elasticsearch8Configuration config) { + this.config = config; + return this; + } + + public ElasticSearch8AsyncDynamicSinkBuilder setFormat( + EncodingFormat> format) { + this.format = format; + return this; + } + + public ElasticSearch8AsyncDynamicSinkBuilder setPhysicalRowDataType( + DataType physicalRowDataType) { + this.physicalRowDataType = physicalRowDataType; + return this; + } + + public ElasticSearch8AsyncDynamicSinkBuilder setPrimaryKeyLogicalTypesWithIndex( + List primaryKeyLogicalTypesWithIndex) { + this.primaryKeyLogicalTypesWithIndex = primaryKeyLogicalTypesWithIndex; + return this; + } + + public ElasticSearch8AsyncDynamicSinkBuilder setLocalTimeZoneId(ZoneId localTimeZoneId) { + this.localTimeZoneId = localTimeZoneId; + return this; + } + + public ElasticSearch8AsyncDynamicSinkBuilder setSummaryString(String summaryString) { + this.summaryString = summaryString; + return this; + } + + @Override + public ElasticSearch8AsyncDynamicSink build() { + return new ElasticSearch8AsyncDynamicSink( + format, + config, + primaryKeyLogicalTypesWithIndex, + physicalRowDataType, + summaryString, + localTimeZoneId); + } + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicTableFactory.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicTableFactory.java new file mode 100644 index 00000000..3362ae24 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticSearch8AsyncDynamicTableFactory.java @@ -0,0 +1,227 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.table.AsyncDynamicTableSinkFactory; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.config.TableConfigOptions; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.Projection; +import org.apache.flink.table.connector.format.EncodingFormat; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.SerializationFormatFactory; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.util.StringUtils; + +import java.time.ZoneId; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.apache.commons.lang3.StringUtils.capitalize; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_INTERVAL_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_PATH_PREFIX_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_REQUEST_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.DELIVERY_GUARANTEE_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.FORMAT_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.HOSTS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.INDEX_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.KEY_DELIMITER_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.PASSWORD_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.SOCKET_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.SSL_CERTIFICATE_FINGERPRINT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.USERNAME_OPTION; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.CACHE_TYPE; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.MAX_RETRIES; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.PARTIAL_CACHE_CACHE_MISSING_KEY; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.PARTIAL_CACHE_EXPIRE_AFTER_ACCESS; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.PARTIAL_CACHE_EXPIRE_AFTER_WRITE; +import static org.apache.flink.table.connector.source.lookup.LookupOptions.PARTIAL_CACHE_MAX_ROWS; +import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; + +/** Factory for creating {@link ElasticSearch8AsyncDynamicSink} . */ +@Internal +public class ElasticSearch8AsyncDynamicTableFactory extends AsyncDynamicTableSinkFactory { + + private static final String IDENTIFIER = "elasticsearch-8"; + + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + List primaryKeyLogicalTypesWithIndex = + getPrimaryKeyLogicalTypesWithIndex(context); + + final FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + EncodingFormat> format = + helper.discoverEncodingFormat(SerializationFormatFactory.class, FORMAT_OPTION); + + Elasticsearch8Configuration config = getConfiguration(helper); + helper.validate(); + validateConfiguration(config); + + ElasticSearch8AsyncDynamicSink.ElasticSearch8AsyncDynamicSinkBuilder builder = + new ElasticSearch8AsyncDynamicSink.ElasticSearch8AsyncDynamicSinkBuilder(); + + return builder.setConfig(config) + .setFormat(format) + .setPrimaryKeyLogicalTypesWithIndex(primaryKeyLogicalTypesWithIndex) + .setPhysicalRowDataType(context.getPhysicalRowDataType()) + .setLocalTimeZoneId(getLocalTimeZoneId(context.getConfiguration())) + .setSummaryString(capitalize(IDENTIFIER)) + .build(); + } + + ZoneId getLocalTimeZoneId(ReadableConfig readableConfig) { + final String zone = readableConfig.get(TableConfigOptions.LOCAL_TIME_ZONE); + + return TableConfigOptions.LOCAL_TIME_ZONE.defaultValue().equals(zone) + ? ZoneId.systemDefault() + : ZoneId.of(zone); + } + + List getPrimaryKeyLogicalTypesWithIndex(Context context) { + DataType physicalRowDataType = context.getPhysicalRowDataType(); + int[] primaryKeyIndexes = context.getPrimaryKeyIndexes(); + if (primaryKeyIndexes.length != 0) { + DataType pkDataType = Projection.of(primaryKeyIndexes).project(physicalRowDataType); + + ElasticsearchValidationUtils.validatePrimaryKey(pkDataType); + } + + ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema(); + return Arrays.stream(primaryKeyIndexes) + .mapToObj( + index -> { + Optional column = resolvedSchema.getColumn(index); + if (!column.isPresent()) { + throw new IllegalStateException( + String.format( + "No primary key column found with index '%s'.", + index)); + } + LogicalType logicalType = column.get().getDataType().getLogicalType(); + return new LogicalTypeWithIndex(index, logicalType); + }) + .collect(Collectors.toList()); + } + + Elasticsearch8Configuration getConfiguration(FactoryUtil.TableFactoryHelper helper) { + return new Elasticsearch8Configuration(helper.getOptions()); + } + + void validateConfiguration(Elasticsearch8Configuration config) { + config.getHosts(); // validate hosts + validate( + config.getIndex().length() >= 1, + () -> String.format("'%s' must not be empty", INDEX_OPTION.key())); + int maxActions = config.getBulkFlushMaxActions(); + validate( + maxActions == -1 || maxActions >= 1, + () -> + String.format( + "'%s' must be at least 1. Got: %s", + BULK_FLUSH_MAX_ACTIONS_OPTION.key(), maxActions)); + long maxSize = config.getBulkFlushMaxByteSize().getBytes(); + long mb1 = 1024 * 1024; + validate( + maxSize == -1 || (maxSize >= mb1 && maxSize % mb1 == 0), + () -> + String.format( + "'%s' must be in MB granularity. Got: %s", + BULK_FLUSH_MAX_SIZE_OPTION.key(), + config.getBulkFlushMaxByteSize().toHumanReadableString())); + if (config.getUsername().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getUsername().get())) { + validate( + config.getPassword().isPresent() + && !StringUtils.isNullOrWhitespaceOnly(config.getPassword().get()), + () -> + String.format( + "'%s' and '%s' must be set at the same time. Got: username '%s' and password '%s'", + USERNAME_OPTION.key(), + PASSWORD_OPTION.key(), + config.getUsername().get(), + config.getPassword().orElse(""))); + } + } + + static void validate(boolean condition, Supplier message) { + if (!condition) { + throw new ValidationException(message.get()); + } + } + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public Set> requiredOptions() { + return Stream.of(HOSTS_OPTION, INDEX_OPTION).collect(Collectors.toSet()); + } + + @Override + public Set> optionalOptions() { + return Stream.of( + KEY_DELIMITER_OPTION, + BULK_FLUSH_MAX_SIZE_OPTION, + BULK_FLUSH_MAX_ACTIONS_OPTION, + BULK_FLUSH_INTERVAL_OPTION, + BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION, + BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION, + CONNECTION_PATH_PREFIX_OPTION, + CONNECTION_REQUEST_TIMEOUT, + CONNECTION_TIMEOUT, + SOCKET_TIMEOUT, + SSL_CERTIFICATE_FINGERPRINT, + FORMAT_OPTION, + DELIVERY_GUARANTEE_OPTION, + PASSWORD_OPTION, + USERNAME_OPTION, + SINK_PARALLELISM, + CACHE_TYPE, + PARTIAL_CACHE_EXPIRE_AFTER_ACCESS, + PARTIAL_CACHE_EXPIRE_AFTER_WRITE, + PARTIAL_CACHE_MAX_ROWS, + PARTIAL_CACHE_CACHE_MISSING_KEY, + MAX_RETRIES) + .collect(Collectors.toSet()); + } + + @Override + public Set> forwardOptions() { + return Stream.of( + HOSTS_OPTION, + INDEX_OPTION, + PASSWORD_OPTION, + USERNAME_OPTION, + KEY_DELIMITER_OPTION, + BULK_FLUSH_MAX_ACTIONS_OPTION, + BULK_FLUSH_MAX_SIZE_OPTION, + BULK_FLUSH_INTERVAL_OPTION, + BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION, + BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION, + CONNECTION_PATH_PREFIX_OPTION, + CONNECTION_REQUEST_TIMEOUT, + CONNECTION_TIMEOUT, + SOCKET_TIMEOUT, + SSL_CERTIFICATE_FINGERPRINT) + .collect(Collectors.toSet()); + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8Configuration.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8Configuration.java new file mode 100644 index 00000000..7432250c --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8Configuration.java @@ -0,0 +1,148 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.connector.base.DeliveryGuarantee; +import org.apache.flink.table.api.ValidationException; + +import org.apache.http.HttpHost; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_INTERVAL_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_PATH_PREFIX_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_REQUEST_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.CONNECTION_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.DELIVERY_GUARANTEE_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.HOSTS_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.INDEX_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.KEY_DELIMITER_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.PASSWORD_OPTION; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.SOCKET_TIMEOUT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.SSL_CERTIFICATE_FINGERPRINT; +import static org.apache.flink.connector.elasticsearch.table.Elasticsearch8ConnectorOptions.USERNAME_OPTION; +import static org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** Elasticsearch base configuration. */ +@Internal +public class Elasticsearch8Configuration { + protected final ReadableConfig config; + + Elasticsearch8Configuration(ReadableConfig config) { + this.config = checkNotNull(config); + } + + public int getBulkFlushMaxActions() { + return config.get(BULK_FLUSH_MAX_ACTIONS_OPTION); + } + + public int getBulkFlushMaxBufferedActions() { + return config.get(BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION); + } + + public int getBulkFlushMaxInFlightActions() { + return config.get(BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION); + } + + public MemorySize getBulkFlushMaxByteSize() { + return config.get(BULK_FLUSH_MAX_SIZE_OPTION); + } + + public long getBulkFlushInterval() { + return config.get(BULK_FLUSH_INTERVAL_OPTION).toMillis(); + } + + public DeliveryGuarantee getDeliveryGuarantee() { + return config.get(DELIVERY_GUARANTEE_OPTION); + } + + public Optional getUsername() { + return config.getOptional(USERNAME_OPTION); + } + + public Optional getPassword() { + return config.getOptional(PASSWORD_OPTION); + } + + public String getIndex() { + return config.get(INDEX_OPTION); + } + + public String getKeyDelimiter() { + return config.get(KEY_DELIMITER_OPTION); + } + + public Optional getPathPrefix() { + return config.getOptional(CONNECTION_PATH_PREFIX_OPTION); + } + + public Optional getConnectionRequestTimeout() { + return config.getOptional(CONNECTION_REQUEST_TIMEOUT); + } + + public Optional getConnectionTimeout() { + return config.getOptional(CONNECTION_TIMEOUT); + } + + public Optional getSocketTimeout() { + return config.getOptional(SOCKET_TIMEOUT); + } + + public Optional getCertificateFingerprint() { + return config.getOptional(SSL_CERTIFICATE_FINGERPRINT); + } + + public List getHosts() { + return config.get(HOSTS_OPTION).stream() + .map(Elasticsearch8Configuration::validateAndParseHostsString) + .collect(Collectors.toList()); + } + + public Optional getParallelism() { + return config.getOptional(SINK_PARALLELISM); + } + + /** + * Parse Hosts String to list. + * + *

Hosts String format was given as following: + * + *

+     *     connector.hosts = http://host_name:9092;http://host_name:9093
+     * 
+ */ + public static HttpHost validateAndParseHostsString(String host) { + try { + HttpHost httpHost = HttpHost.create(host); + if (httpHost.getPort() < 0) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'. Missing port.", + host, HOSTS_OPTION.key())); + } + + if (httpHost.getSchemeName() == null) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'. Missing scheme.", + host, HOSTS_OPTION.key())); + } + return httpHost; + } catch (Exception e) { + throw new ValidationException( + String.format( + "Could not parse host '%s' in option '%s'. It should follow the format 'http://host_name:port'.", + host, HOSTS_OPTION.key()), + e); + } + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8ConnectorOptions.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8ConnectorOptions.java new file mode 100644 index 00000000..56c778a7 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8ConnectorOptions.java @@ -0,0 +1,131 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.connector.base.DeliveryGuarantee; + +import java.time.Duration; +import java.util.List; + +/** + * Base options for the Elasticsearch connector. Needs to be public so that the {@link + * org.apache.flink.table.api.TableDescriptor} can access it. + */ +@PublicEvolving +public class Elasticsearch8ConnectorOptions { + + Elasticsearch8ConnectorOptions() {} + + public static final ConfigOption> HOSTS_OPTION = + ConfigOptions.key("hosts") + .stringType() + .asList() + .noDefaultValue() + .withDescription("Elasticsearch hosts to connect to."); + + public static final ConfigOption INDEX_OPTION = + ConfigOptions.key("index") + .stringType() + .noDefaultValue() + .withDescription("Elasticsearch index for every record."); + + public static final ConfigOption PASSWORD_OPTION = + ConfigOptions.key("password") + .stringType() + .noDefaultValue() + .withDescription("Password used to connect to Elasticsearch instance."); + + public static final ConfigOption USERNAME_OPTION = + ConfigOptions.key("username") + .stringType() + .noDefaultValue() + .withDescription("Username used to connect to Elasticsearch instance."); + + public static final ConfigOption KEY_DELIMITER_OPTION = + ConfigOptions.key("document-id.key-delimiter") + .stringType() + .defaultValue("_") + .withDescription( + "Delimiter for composite keys e.g., \"$\" would result in IDs \"KEY1$KEY2$KEY3\"."); + + public static final ConfigOption BULK_FLUSH_MAX_ACTIONS_OPTION = + ConfigOptions.key("sink.bulk-flush.max-actions") + .intType() + .defaultValue(1000) + .withDescription("Maximum number of actions for each bulk request."); + + public static final ConfigOption BULK_FLUSH_MAX_BUFFERED_ACTIONS_OPTION = + ConfigOptions.key("sink.bulk-flush.max-buffered-actions") + .intType() + .defaultValue(10000) + .withDescription("Maximum buffer length for actions"); + + public static final ConfigOption BULK_FLUSH_MAX_IN_FLIGHT_ACTIONS_OPTION = + ConfigOptions.key("sink.bulk-flush.max-in-flight-actions") + .intType() + .defaultValue(50) + .withDescription( + "Threshold for uncompleted actions before blocking new write actions."); + + public static final ConfigOption BULK_FLUSH_MAX_SIZE_OPTION = + ConfigOptions.key("sink.bulk-flush.max-size") + .memoryType() + .defaultValue(MemorySize.parse("2mb")) + .withDescription("Maximum size of buffered actions per bulk request"); + + public static final ConfigOption BULK_FLUSH_INTERVAL_OPTION = + ConfigOptions.key("sink.bulk-flush.interval") + .durationType() + .defaultValue(Duration.ofSeconds(1)) + .withDescription("Bulk flush interval"); + + public static final ConfigOption CONNECTION_PATH_PREFIX_OPTION = + ConfigOptions.key("connection.path-prefix") + .stringType() + .noDefaultValue() + .withDescription("Prefix string to be added to every REST communication."); + + public static final ConfigOption CONNECTION_REQUEST_TIMEOUT = + ConfigOptions.key("connection.request-timeout") + .durationType() + .noDefaultValue() + .withDescription( + "The timeout for requesting a connection from the connection manager."); + + public static final ConfigOption CONNECTION_TIMEOUT = + ConfigOptions.key("connection.timeout") + .durationType() + .noDefaultValue() + .withDescription("The timeout for establishing a connection."); + + public static final ConfigOption SOCKET_TIMEOUT = + ConfigOptions.key("socket.timeout") + .durationType() + .noDefaultValue() + .withDescription( + "The socket timeout (SO_TIMEOUT) for waiting for data or, put differently," + + "a maximum period inactivity between two consecutive data packets."); + + public static final ConfigOption SSL_CERTIFICATE_FINGERPRINT = + ConfigOptions.key("ssl.certificate-fingerprint") + .stringType() + .noDefaultValue() + .withDescription( + "The HTTP CA certificate SHA-256 fingerprint used to verify the HTTPS connection."); + + public static final ConfigOption FORMAT_OPTION = + ConfigOptions.key("format") + .stringType() + .defaultValue("json") + .withDescription( + "The format must produce a valid JSON document. " + + "Please refer to the documentation on formats for more details."); + + public static final ConfigOption DELIVERY_GUARANTEE_OPTION = + ConfigOptions.key("sink.delivery-guarantee") + .enumType(DeliveryGuarantee.class) + .defaultValue(DeliveryGuarantee.AT_LEAST_ONCE) + .withDescription("Optional delivery guarantee when committing."); +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticsearchValidationUtils.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticsearchValidationUtils.java new file mode 100644 index 00000000..b431fec8 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/ElasticsearchValidationUtils.java @@ -0,0 +1,74 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.LogicalTypeFamily; +import org.apache.flink.table.types.logical.LogicalTypeRoot; + +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** Utility methods for validating Elasticsearch properties. */ +@Internal +class ElasticsearchValidationUtils { + private static final Set ALLOWED_PRIMARY_KEY_TYPES = new LinkedHashSet<>(); + + static { + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.CHAR); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.VARCHAR); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.BOOLEAN); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DECIMAL); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TINYINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.SMALLINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTEGER); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.BIGINT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.FLOAT); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DOUBLE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.DATE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTERVAL_YEAR_MONTH); + ALLOWED_PRIMARY_KEY_TYPES.add(LogicalTypeRoot.INTERVAL_DAY_TIME); + } + + /** + * Checks that the table does not have a primary key defined on illegal types. In Elasticsearch + * the primary key is used to calculate the Elasticsearch document id, which is a string of up + * to 512 bytes. It cannot have whitespaces. As of now it is calculated by concatenating the + * fields. Certain types do not have a good string representation to be used in this scenario. + * The illegal types are mostly {@link LogicalTypeFamily#COLLECTION} types and {@link + * LogicalTypeRoot#RAW} type. + */ + public static void validatePrimaryKey(DataType primaryKeyDataType) { + List fieldDataTypes = DataType.getFieldDataTypes(primaryKeyDataType); + List illegalTypes = + fieldDataTypes.stream() + .map(DataType::getLogicalType) + .map( + logicalType -> { + if (logicalType.is(LogicalTypeRoot.DISTINCT_TYPE)) { + return ((DistinctType) logicalType) + .getSourceType() + .getTypeRoot(); + } else { + return logicalType.getTypeRoot(); + } + }) + .filter(t -> !ALLOWED_PRIMARY_KEY_TYPES.contains(t)) + .collect(Collectors.toList()); + if (!illegalTypes.isEmpty()) { + throw new ValidationException( + String.format( + "The table has a primary key on columns of illegal types: %s.", + illegalTypes)); + } + } + + private ElasticsearchValidationUtils() {} +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGenerator.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGenerator.java new file mode 100644 index 00000000..12718f55 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGenerator.java @@ -0,0 +1,21 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; +import org.apache.flink.types.Row; + +import java.io.Serializable; + +/** This interface is responsible to generate index name from given {@link Row} record. */ +@Internal +interface IndexGenerator extends Serializable { + + /** + * Initialize the index generator, this will be called only once before {@link + * #generate(RowData)} is called. + */ + default void open() {} + + /** Generate index name according to the given row. */ + String generate(RowData row); +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorBase.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorBase.java new file mode 100644 index 00000000..9af36ee6 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorBase.java @@ -0,0 +1,34 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; + +import java.util.Objects; + +/** Base class for {@link IndexGenerator}. */ +@Internal +public abstract class IndexGeneratorBase implements IndexGenerator { + + private static final long serialVersionUID = 1L; + protected final String index; + + public IndexGeneratorBase(String index) { + this.index = index; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof IndexGeneratorBase)) { + return false; + } + IndexGeneratorBase that = (IndexGeneratorBase) o; + return index.equals(that.index); + } + + @Override + public int hashCode() { + return Objects.hash(index); + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorFactory.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorFactory.java new file mode 100644 index 00000000..2bf651ee --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorFactory.java @@ -0,0 +1,298 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; + +import javax.annotation.Nonnull; + +import java.io.Serializable; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Factory of {@link IndexGenerator}. + * + *

Flink supports both static index and dynamic index. + * + *

If you want to have a static index, this option value should be a plain string, e.g. + * 'myusers', all the records will be consistently written into "myusers" index. + * + *

If you want to have a dynamic index, you can use '{field_name}' to reference a field value in + * the record to dynamically generate a target index. You can also use + * '{field_name|date_format_string}' to convert a field value of TIMESTAMP/DATE/TIME type into the + * format specified by date_format_string. The date_format_string is compatible with {@link + * java.text.SimpleDateFormat}. For example, if the option value is 'myusers_{log_ts|yyyy-MM-dd}', + * then a record with log_ts field value 2020-03-27 12:25:55 will be written into + * "myusers_2020-03-27" index. + */ +@Internal +final class IndexGeneratorFactory { + + private IndexGeneratorFactory() {} + + public static IndexGenerator createIndexGenerator( + String index, + List fieldNames, + List dataTypes, + ZoneId localTimeZoneId) { + final IndexHelper indexHelper = new IndexHelper(); + if (indexHelper.checkIsDynamicIndex(index)) { + return createRuntimeIndexGenerator( + index, + fieldNames.toArray(new String[0]), + dataTypes.toArray(new DataType[0]), + indexHelper, + localTimeZoneId); + } else { + return new StaticIndexGenerator(index); + } + } + + public static IndexGenerator createIndexGenerator( + String index, List fieldNames, List dataTypes) { + return createIndexGenerator(index, fieldNames, dataTypes, ZoneId.systemDefault()); + } + + interface DynamicFormatter extends Serializable { + String format(@Nonnull Object fieldValue, DateTimeFormatter formatter); + } + + private static IndexGenerator createRuntimeIndexGenerator( + String index, + String[] fieldNames, + DataType[] fieldTypes, + IndexHelper indexHelper, + ZoneId localTimeZoneId) { + final String dynamicIndexPatternStr = indexHelper.extractDynamicIndexPatternStr(index); + final String indexPrefix = index.substring(0, index.indexOf(dynamicIndexPatternStr)); + final String indexSuffix = + index.substring(indexPrefix.length() + dynamicIndexPatternStr.length()); + + if (indexHelper.checkIsDynamicIndexWithSystemTimeFormat(index)) { + final String dateTimeFormat = + indexHelper.extractDateFormat( + index, LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + return new AbstractTimeIndexGenerator(index, dateTimeFormat) { + @Override + public String generate(RowData row) { + return indexPrefix + .concat(LocalDateTime.now(localTimeZoneId).format(dateTimeFormatter)) + .concat(indexSuffix); + } + }; + } + + final boolean isDynamicIndexWithFormat = indexHelper.checkIsDynamicIndexWithFormat(index); + final int indexFieldPos = + indexHelper.extractIndexFieldPos(index, fieldNames, isDynamicIndexWithFormat); + final LogicalType indexFieldType = fieldTypes[indexFieldPos].getLogicalType(); + final LogicalTypeRoot indexFieldLogicalTypeRoot = indexFieldType.getTypeRoot(); + + // validate index field type + indexHelper.validateIndexFieldType(indexFieldLogicalTypeRoot); + + // time extract dynamic index pattern + final RowData.FieldGetter fieldGetter = + RowData.createFieldGetter(indexFieldType, indexFieldPos); + + if (isDynamicIndexWithFormat) { + final String dateTimeFormat = + indexHelper.extractDateFormat(index, indexFieldLogicalTypeRoot); + DynamicFormatter formatFunction = + createFormatFunction( + indexFieldType, indexFieldLogicalTypeRoot, localTimeZoneId); + + return new AbstractTimeIndexGenerator(index, dateTimeFormat) { + @Override + public String generate(RowData row) { + Object fieldOrNull = fieldGetter.getFieldOrNull(row); + final String formattedField; + if (fieldOrNull != null) { + formattedField = formatFunction.format(fieldOrNull, dateTimeFormatter); + } else { + formattedField = "null"; + } + return indexPrefix.concat(formattedField).concat(indexSuffix); + } + }; + } + // general dynamic index pattern + return new IndexGeneratorBase(index) { + @Override + public String generate(RowData row) { + Object indexField = fieldGetter.getFieldOrNull(row); + return indexPrefix + .concat(indexField == null ? "null" : indexField.toString()) + .concat(indexSuffix); + } + }; + } + + private static DynamicFormatter createFormatFunction( + LogicalType indexFieldType, + LogicalTypeRoot indexFieldLogicalTypeRoot, + ZoneId localTimeZoneId) { + switch (indexFieldLogicalTypeRoot) { + case DATE: + return (value, dateTimeFormatter) -> { + Integer indexField = (Integer) value; + return LocalDate.ofEpochDay(indexField).format(dateTimeFormatter); + }; + case TIME_WITHOUT_TIME_ZONE: + return (value, dateTimeFormatter) -> { + Integer indexField = (Integer) value; + return LocalTime.ofNanoOfDay(indexField * 1_000_000L).format(dateTimeFormatter); + }; + case TIMESTAMP_WITHOUT_TIME_ZONE: + return (value, dateTimeFormatter) -> { + TimestampData indexField = (TimestampData) value; + return indexField.toLocalDateTime().format(dateTimeFormatter); + }; + case TIMESTAMP_WITH_TIME_ZONE: + throw new UnsupportedOperationException( + "TIMESTAMP_WITH_TIME_ZONE is not supported yet"); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return (value, dateTimeFormatter) -> { + TimestampData indexField = (TimestampData) value; + return indexField.toInstant().atZone(localTimeZoneId).format(dateTimeFormatter); + }; + default: + throw new TableException( + String.format( + "Unsupported type '%s' found in Elasticsearch dynamic index field, " + + "time-related pattern only support types are: DATE,TIME,TIMESTAMP.", + indexFieldType)); + } + } + + /** + * Helper class for {@link IndexGeneratorFactory}, this helper can use to validate index field + * type ans parse index format from pattern. + */ + static class IndexHelper { + private static final Pattern dynamicIndexPattern = Pattern.compile("\\{[^\\{\\}]+\\}?"); + private static final Pattern dynamicIndexTimeExtractPattern = + Pattern.compile(".*\\{.+\\|.*\\}.*"); + private static final Pattern dynamicIndexSystemTimeExtractPattern = + Pattern.compile( + ".*\\{\\s*(now\\(\\s*\\)|NOW\\(\\s*\\)|current_timestamp|CURRENT_TIMESTAMP)\\s*\\|.*\\}.*"); + private static final List supportedTypes = new ArrayList<>(); + private static final Map defaultFormats = new HashMap<>(); + + static { + // time related types + supportedTypes.add(LogicalTypeRoot.DATE); + supportedTypes.add(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE); + supportedTypes.add(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE); + // general types + supportedTypes.add(LogicalTypeRoot.VARCHAR); + supportedTypes.add(LogicalTypeRoot.CHAR); + supportedTypes.add(LogicalTypeRoot.TINYINT); + supportedTypes.add(LogicalTypeRoot.INTEGER); + supportedTypes.add(LogicalTypeRoot.BIGINT); + } + + static { + defaultFormats.put(LogicalTypeRoot.DATE, "yyyy_MM_dd"); + defaultFormats.put(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE, "HH_mm_ss"); + defaultFormats.put(LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE, "yyyy_MM_dd_HH_mm_ss"); + defaultFormats.put(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE, "yyyy_MM_dd_HH_mm_ss"); + defaultFormats.put( + LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE, "yyyy_MM_dd_HH_mm_ssX"); + } + + /** Validate the index field Type. */ + void validateIndexFieldType(LogicalTypeRoot logicalType) { + if (!supportedTypes.contains(logicalType)) { + throw new IllegalArgumentException( + String.format( + "Unsupported type %s of index field, " + "Supported types are: %s", + logicalType, supportedTypes)); + } + } + + /** Get the default date format. */ + String getDefaultFormat(LogicalTypeRoot logicalType) { + return defaultFormats.get(logicalType); + } + + /** Check general dynamic index is enabled or not by index pattern. */ + boolean checkIsDynamicIndex(String index) { + final Matcher matcher = dynamicIndexPattern.matcher(index); + int count = 0; + while (matcher.find()) { + count++; + } + if (count > 1) { + throw new TableException( + String.format( + "Chaining dynamic index pattern %s is not supported," + + " only support single dynamic index pattern.", + index)); + } + return count == 1; + } + + /** Check time extract dynamic index is enabled or not by index pattern. */ + boolean checkIsDynamicIndexWithFormat(String index) { + return dynamicIndexTimeExtractPattern.matcher(index).matches(); + } + + /** Check generate dynamic index is from system time or not. */ + boolean checkIsDynamicIndexWithSystemTimeFormat(String index) { + return dynamicIndexSystemTimeExtractPattern.matcher(index).matches(); + } + + /** Extract dynamic index pattern string from index pattern string. */ + String extractDynamicIndexPatternStr(String index) { + int start = index.indexOf("{"); + int end = index.lastIndexOf("}"); + return index.substring(start, end + 1); + } + + /** Extract index field position in a fieldNames, return the field position. */ + int extractIndexFieldPos( + String index, String[] fieldNames, boolean isDynamicIndexWithFormat) { + List fieldList = Arrays.asList(fieldNames); + String indexFieldName; + if (isDynamicIndexWithFormat) { + indexFieldName = index.substring(index.indexOf("{") + 1, index.indexOf("|")); + } else { + indexFieldName = index.substring(index.indexOf("{") + 1, index.indexOf("}")); + } + if (!fieldList.contains(indexFieldName)) { + throw new TableException( + String.format( + "Unknown field '%s' in index pattern '%s', please check the field name.", + indexFieldName, index)); + } + return fieldList.indexOf(indexFieldName); + } + + /** Extract dateTime format by the date format that extracted from index pattern string. */ + private String extractDateFormat(String index, LogicalTypeRoot logicalType) { + String format = index.substring(index.indexOf("|") + 1, index.indexOf("}")); + if ("".equals(format)) { + format = getDefaultFormat(logicalType); + } + return format; + } + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/KeyExtractor.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/KeyExtractor.java new file mode 100644 index 00000000..2dda12af --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/KeyExtractor.java @@ -0,0 +1,79 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.util.function.SerializableFunction; + +import java.io.Serializable; +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.Period; +import java.util.List; + +/** An extractor for a Elasticsearch key from a {@link RowData}. */ +@Internal +class KeyExtractor implements SerializableFunction { + private final FieldFormatter[] fieldFormatters; + private final String keyDelimiter; + + private interface FieldFormatter extends Serializable { + String format(RowData rowData); + } + + private KeyExtractor(FieldFormatter[] fieldFormatters, String keyDelimiter) { + this.fieldFormatters = fieldFormatters; + this.keyDelimiter = keyDelimiter; + } + + @Override + public String apply(RowData rowData) { + final StringBuilder builder = new StringBuilder(); + for (int i = 0; i < fieldFormatters.length; i++) { + if (i > 0) { + builder.append(keyDelimiter); + } + final String value = fieldFormatters[i].format(rowData); + builder.append(value); + } + return builder.toString(); + } + + public static SerializableFunction createKeyExtractor( + List primaryKeyTypesWithIndex, String keyDelimiter) { + if (!primaryKeyTypesWithIndex.isEmpty()) { + FieldFormatter[] formatters = + primaryKeyTypesWithIndex.stream() + .map( + logicalTypeWithIndex -> + toFormatter( + logicalTypeWithIndex.index, + logicalTypeWithIndex.logicalType)) + .toArray(FieldFormatter[]::new); + return new KeyExtractor(formatters, keyDelimiter); + } else { + return (row) -> null; + } + } + + private static FieldFormatter toFormatter(int index, LogicalType type) { + switch (type.getTypeRoot()) { + case DATE: + return (row) -> LocalDate.ofEpochDay(row.getInt(index)).toString(); + case TIME_WITHOUT_TIME_ZONE: + return (row) -> + LocalTime.ofNanoOfDay((long) row.getInt(index) * 1_000_000L).toString(); + case INTERVAL_YEAR_MONTH: + return (row) -> Period.ofDays(row.getInt(index)).toString(); + case INTERVAL_DAY_TIME: + return (row) -> Duration.ofMillis(row.getLong(index)).toString(); + case DISTINCT_TYPE: + return toFormatter(index, ((DistinctType) type).getSourceType()); + default: + RowData.FieldGetter fieldGetter = RowData.createFieldGetter(type, index); + return row -> fieldGetter.getFieldOrNull(row).toString(); + } + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/LogicalTypeWithIndex.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/LogicalTypeWithIndex.java new file mode 100644 index 00000000..7891e50a --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/LogicalTypeWithIndex.java @@ -0,0 +1,13 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.table.types.logical.LogicalType; + +class LogicalTypeWithIndex { + public final int index; + public final LogicalType logicalType; + + LogicalTypeWithIndex(int index, LogicalType logicalType) { + this.index = index; + this.logicalType = logicalType; + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataElementConverter.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataElementConverter.java new file mode 100644 index 00000000..a1e499ed --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataElementConverter.java @@ -0,0 +1,81 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.connector.sink2.SinkWriter; +import org.apache.flink.api.connector.sink2.WriterInitContext; +import org.apache.flink.connector.base.sink.writer.ElementConverter; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.DataType; + +import co.elastic.clients.elasticsearch.core.bulk.BulkOperationVariant; +import co.elastic.clients.elasticsearch.core.bulk.DeleteOperation; +import co.elastic.clients.elasticsearch.core.bulk.IndexOperation; +import co.elastic.clients.elasticsearch.core.bulk.UpdateOperation; + +import java.util.Map; +import java.util.function.Function; + +/** + * Implementation of an {@link ElementConverter} for the ElasticSearch Table sink. The element + * converter maps the Flink internal type of {@link RowData} to a {@link BulkOperationVariant} to be + * used by Elasticsearch Java API + */ +@Internal +public class RowDataElementConverter implements ElementConverter { + private final IndexGenerator indexGenerator; + private final Function keyExtractor; + private final RowDataToMapConverter rowDataToMapConverter; + + public RowDataElementConverter( + DataType physicalDataType, + IndexGenerator indexGenerator, + Function keyExtractor) { + this.rowDataToMapConverter = new RowDataToMapConverter(physicalDataType); + this.indexGenerator = indexGenerator; + this.keyExtractor = keyExtractor; + } + + @Override + public void open(WriterInitContext context) { + indexGenerator.open(); + } + + @Override + public BulkOperationVariant apply(RowData rowData, SinkWriter.Context context) { + Map dataMap = rowDataToMapConverter.toMap(rowData); + + BulkOperationVariant operation; + + switch (rowData.getRowKind()) { + case INSERT: + operation = + new IndexOperation.Builder<>() + .index(indexGenerator.generate(rowData)) + .id(keyExtractor.apply(rowData)) + .document(dataMap) + .build(); + break; + case UPDATE_AFTER: + operation = + new UpdateOperation.Builder<>() + .index(indexGenerator.generate(rowData)) + .id(keyExtractor.apply(rowData)) + .action(a -> a.doc(dataMap).docAsUpsert(true)) + .build(); + break; + case UPDATE_BEFORE: + case DELETE: + operation = + new DeleteOperation.Builder() + .index(indexGenerator.generate(rowData)) + .id(keyExtractor.apply(rowData)) + .build(); + break; + default: + throw new TableException("Unsupported message kind: " + rowData.getRowKind()); + } + + return operation; + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataToMapConverter.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataToMapConverter.java new file mode 100644 index 00000000..aa5fc098 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/RowDataToMapConverter.java @@ -0,0 +1,44 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.conversion.DataStructureConverters; +import org.apache.flink.table.types.DataType; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Tool class used to convert from {@link RowData} to {@link Map}. * */ +@Internal +public class RowDataToMapConverter implements Serializable { + + private static final long serialVersionUID = 1L; + + private final DataType physicalDataType; + + public RowDataToMapConverter(DataType physicalDataType) { + this.physicalDataType = physicalDataType; + } + + public Map toMap(RowData rowData) { + List fields = DataType.getFields(physicalDataType); + + Map map = new HashMap<>(fields.size()); + for (int i = 0; i < fields.size(); i++) { + DataTypes.Field field = fields.get(i); + RowData.FieldGetter fieldGetter = + RowData.createFieldGetter(field.getDataType().getLogicalType(), i); + + String key = field.getName(); + Object value = + DataStructureConverters.getConverter(field.getDataType()) + .toExternalOrNull(fieldGetter.getFieldOrNull(rowData)); + + map.put(key, value); + } + return map; + } +} diff --git a/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/StaticIndexGenerator.java b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/StaticIndexGenerator.java new file mode 100644 index 00000000..470ef6c1 --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/java/org/apache/flink/connector/elasticsearch/table/StaticIndexGenerator.java @@ -0,0 +1,17 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.table.data.RowData; + +/** A static {@link IndexGenerator} which generate fixed index name. */ +@Internal +final class StaticIndexGenerator extends IndexGeneratorBase { + + public StaticIndexGenerator(String index) { + super(index); + } + + public String generate(RowData row) { + return index; + } +} diff --git a/flink-connector-elasticsearch8/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/flink-connector-elasticsearch8/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000..de87735b --- /dev/null +++ b/flink-connector-elasticsearch8/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.flink.connector.elasticsearch.table.ElasticSearch8AsyncDynamicTableFactory + diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncWriterITCase.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncWriterITCase.java index c401e761..d65ae612 100644 --- a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncWriterITCase.java +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/sink/Elasticsearch8AsyncWriterITCase.java @@ -216,9 +216,13 @@ private NetworkConfig createNetworkConfig() { ES_CLUSTER_USERNAME, ES_CLUSTER_PASSWORD, null, + null, + null, + null, + null, () -> ES_CONTAINER_SECURE.createSslContextFromCa(), null) - : new NetworkConfig(esHost, null, null, null, null, null); + : new NetworkConfig(esHost, null, null, null, null, null, null, null, null, null); } private Elasticsearch8AsyncWriter createWriter( diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkBaseITCase.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkBaseITCase.java new file mode 100644 index 00000000..7ecde01b --- /dev/null +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkBaseITCase.java @@ -0,0 +1,304 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.connector.elasticsearch.sink.NetworkConfig; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.testutils.junit.extensions.parameterized.Parameter; +import org.apache.flink.testutils.junit.extensions.parameterized.ParameterizedTestExtension; +import org.apache.flink.testutils.junit.extensions.parameterized.Parameters; + +import co.elastic.clients.elasticsearch.ElasticsearchAsyncClient; +import co.elastic.clients.elasticsearch.core.GetRequest; +import co.elastic.clients.elasticsearch.core.SearchRequest; +import co.elastic.clients.transport.TransportUtils; +import org.apache.commons.codec.binary.Hex; +import org.apache.http.HttpHost; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.platform.commons.util.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.containers.wait.strategy.LogMessageWaitStrategy; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.elasticsearch.ElasticsearchContainer; +import org.testcontainers.utility.DockerImageName; + +import java.io.IOException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.security.cert.CertificateException; +import java.security.cert.CertificateFactory; +import java.security.cert.X509Certificate; +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +import static org.apache.flink.connector.elasticsearch.table.TestContext.context; +import static org.assertj.core.api.Assertions.assertThat; + +/** + * {@link Elasticsearch8DynamicSinkBaseITCase} is the base class for integration tests. + * + *

It is extended with the {@link ParameterizedTestExtension} for parameterized testing against + * secure and non-secure Elasticsearch clusters. Tests must be annotated by {@link TestTemplate} in + * order to be parameterized. + * + *

The cluster is running via test containers. In order to reuse the singleton containers by all + * inheriting test classes, we manage their lifecycle. The two containers are started only once when + * this class is loaded. At the end of the test suite the Ryuk container that is started by + * Testcontainers core will take care of stopping the singleton container. + */ +@ExtendWith(ParameterizedTestExtension.class) +abstract class Elasticsearch8DynamicSinkBaseITCase { + private static final Logger LOG = + LoggerFactory.getLogger(Elasticsearch8DynamicSinkBaseITCase.class); + + public static final String ELASTICSEARCH_VERSION = "8.12.1"; + public static final DockerImageName ELASTICSEARCH_IMAGE = + DockerImageName.parse("docker.elastic.co/elasticsearch/elasticsearch") + .withTag(ELASTICSEARCH_VERSION); + protected static final String ES_CLUSTER_USERNAME = "elastic"; + protected static final String ES_CLUSTER_PASSWORD = "s3cret"; + protected static final ElasticsearchContainer ES_CONTAINER = createElasticsearchContainer(); + protected static final ElasticsearchContainer ES_CONTAINER_SECURE = + createSecureElasticsearchContainer(); + + protected static String certificateFingerprint = null; + + // Use singleton test containers which are only started once for several test classes. + // There is no special support for this use case provided by the Testcontainers + // extension @Testcontainers. + static { + ES_CONTAINER.start(); + ES_CONTAINER_SECURE.start(); + } + + @Parameter public boolean secure; + + protected ElasticsearchAsyncClient client; + + @Parameters(name = "ES secured = {0}") + public static List secureEnabled() { + return Arrays.asList(false, true); + } + + @BeforeEach + public void setUpBase() { + LOG.info("Setting up elasticsearch client, host: {}, secure: {}", getHost(), secure); + certificateFingerprint = secure ? getEsCertFingerprint() : null; + assertThat(secure).isEqualTo(StringUtils.isNotBlank(certificateFingerprint)); + client = secure ? createSecureElasticsearchClient() : createElasticsearchClient(); + } + + @AfterEach + public void shutdownBase() throws IOException { + client.shutdown(); + } + + private String getEsCertFingerprint() { + if (!ES_CONTAINER_SECURE.caCertAsBytes().isPresent()) { + LOG.error("cannot get the CA cert from the docker container."); + return null; + } + + byte[] caCertBytes = ES_CONTAINER_SECURE.caCertAsBytes().get(); + + CertificateFactory cf; + byte[] fingerprintBytes = new byte[0]; + try { + cf = CertificateFactory.getInstance("X.509"); + X509Certificate caCert = + (X509Certificate) + cf.generateCertificate(new java.io.ByteArrayInputStream(caCertBytes)); + + MessageDigest md = MessageDigest.getInstance("SHA-256"); + fingerprintBytes = md.digest(caCert.getEncoded()); + } catch (CertificateException | NoSuchAlgorithmException e) { + LOG.error("failed to compute certificate fingerprint: ", e); + } + + return Hex.encodeHexString(fingerprintBytes); + } + + TestContext getPrefilledTestContext(String index) { + TestContext testContext = + context() + .withOption(Elasticsearch8ConnectorOptions.INDEX_OPTION.key(), index) + .withOption( + Elasticsearch8ConnectorOptions.HOSTS_OPTION.key(), + secure + ? "https://" + ES_CONTAINER_SECURE.getHttpHostAddress() + : "http://" + ES_CONTAINER.getHttpHostAddress()); + if (secure) { + testContext + .withOption( + Elasticsearch8ConnectorOptions.USERNAME_OPTION.key(), + ES_CLUSTER_USERNAME) + .withOption( + Elasticsearch8ConnectorOptions.PASSWORD_OPTION.key(), + ES_CLUSTER_PASSWORD) + .withOption( + Elasticsearch8ConnectorOptions.SSL_CERTIFICATE_FINGERPRINT.key(), + certificateFingerprint); + } + return testContext; + } + + @SuppressWarnings({"unchecked"}) + Map makeGetRequest(String index, String id) + throws ExecutionException, InterruptedException { + return (Map) + client.get(new GetRequest.Builder().index(index).id(id).build(), Map.class) + .get() + .source(); + } + + @SuppressWarnings({"unchecked"}) + List> makeSearchRequest(String index) + throws ExecutionException, InterruptedException { + return client.search(new SearchRequest.Builder().index(index).build(), Map.class).get() + .hits().hits().stream() + .map(hit -> (Map) hit.source()) + .collect(Collectors.toList()); + } + + String getConnectorSql(String index) { + if (secure) { + return String.format("'%s'='%s',\n", "connector", "elasticsearch-8") + + String.format( + "'%s'='%s',\n", Elasticsearch8ConnectorOptions.INDEX_OPTION.key(), index) + + String.format( + "'%s'='%s',\n", + Elasticsearch8ConnectorOptions.HOSTS_OPTION.key(), + "https://" + ES_CONTAINER_SECURE.getHttpHostAddress()) + + String.format( + "'%s'='%s',\n", + Elasticsearch8ConnectorOptions.USERNAME_OPTION.key(), + ES_CLUSTER_USERNAME) + + String.format( + "'%s'='%s',\n", + Elasticsearch8ConnectorOptions.PASSWORD_OPTION.key(), + ES_CLUSTER_PASSWORD) + + String.format( + "'%s'='%s'\n", + Elasticsearch8ConnectorOptions.SSL_CERTIFICATE_FINGERPRINT.key(), + certificateFingerprint); + } else { + return String.format("'%s'='%s',\n", "connector", "elasticsearch-8") + + String.format( + "'%s'='%s',\n", Elasticsearch8ConnectorOptions.INDEX_OPTION.key(), index) + + String.format( + "'%s'='%s'\n", + Elasticsearch8ConnectorOptions.HOSTS_OPTION.key(), + "http://" + ES_CONTAINER.getHttpHostAddress()); + } + } + + private static ElasticsearchContainer createElasticsearchContainer() { + final ElasticsearchContainer container = + new ElasticsearchContainer(ELASTICSEARCH_IMAGE) + .withEnv("xpack.security.enabled", "false") + .withEnv("ES_JAVA_OPTS", "-Xms2g -Xmx2g") + .withEnv("logger.org.elasticsearch", "ERROR") + .withLogConsumer(new Slf4jLogConsumer(LOG)); + + container.setWaitStrategy( + Wait.defaultWaitStrategy().withStartupTimeout(Duration.ofMinutes(5))); + + return container; + } + + private static ElasticsearchContainer createSecureElasticsearchContainer() { + ElasticsearchContainer container = + new ElasticsearchContainer(ELASTICSEARCH_IMAGE) + .withPassword(ES_CLUSTER_PASSWORD) /* set password */ + .withEnv("ES_JAVA_OPTS", "-Xms2g -Xmx2g") + .withLogConsumer(new Slf4jLogConsumer(LOG)); + + // Set log message based wait strategy as the default wait strategy is not aware of TLS + container + .withEnv("logger.org.elasticsearch", "INFO") + .setWaitStrategy( + new LogMessageWaitStrategy().withRegEx(".*\"message\":\"started.*")); + + return container; + } + + private ElasticsearchAsyncClient createElasticsearchClient() { + return new NetworkConfig( + Collections.singletonList(getHost()), + null, + null, + null, + null, + null, + null, + null, + null, + null) + .createEsClient(); + } + + /** Get Elasticsearch host depending on the parameter secure. */ + protected HttpHost getHost() { + return secure + ? new HttpHost( + ES_CONTAINER_SECURE.getHost(), + ES_CONTAINER_SECURE.getFirstMappedPort(), + "https") + : new HttpHost(ES_CONTAINER.getHost(), ES_CONTAINER.getFirstMappedPort()); + } + + private ElasticsearchAsyncClient createSecureElasticsearchClient() { + return new NetworkConfig( + Collections.singletonList(getHost()), + ES_CLUSTER_USERNAME, + ES_CLUSTER_PASSWORD, + null, + null, + null, + null, + null, + () -> TransportUtils.sslContextFromCaFingerprint(certificateFingerprint), + null) + .createEsClient(); + } + + /** A mock {@link DynamicTableSink.Context} for Elasticsearch tests. */ + static class MockContext implements DynamicTableSink.Context { + @Override + public boolean isBounded() { + return false; + } + + @Override + public TypeInformation createTypeInformation(DataType consumedDataType) { + return null; + } + + @Override + public TypeInformation createTypeInformation(LogicalType consumedLogicalType) { + return null; + } + + @Override + public DynamicTableSink.DataStructureConverter createDataStructureConverter( + DataType consumedDataType) { + return null; + } + + public Optional getTargetColumns() { + return Optional.empty(); + } + } +} diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkITCase.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkITCase.java new file mode 100644 index 00000000..a2e5fbef --- /dev/null +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/Elasticsearch8DynamicSinkITCase.java @@ -0,0 +1,312 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.api.common.time.Deadline; +import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.config.TableConfigOptions; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.catalog.UniqueConstraint; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkV2Provider; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.testutils.junit.extensions.parameterized.ParameterizedTestExtension; +import org.apache.flink.types.RowKind; + +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; + +import java.time.Duration; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import static org.apache.flink.table.api.Expressions.row; +import static org.assertj.core.api.Assertions.assertThat; + +/** IT tests for {@link ElasticSearch8AsyncDynamicSink}. */ +@ExtendWith(ParameterizedTestExtension.class) +class Elasticsearch8DynamicSinkITCase extends Elasticsearch8DynamicSinkBaseITCase { + private static final int PARALLELISM = 4; + + @TestTemplate + public void testWritingDocuments() throws Exception { + ResolvedSchema schema = + new ResolvedSchema( + Arrays.asList( + Column.physical("a", DataTypes.BIGINT().notNull()), + Column.physical("b", DataTypes.TIME()), + Column.physical("c", DataTypes.STRING().notNull()), + Column.physical("d", DataTypes.FLOAT()), + Column.physical("e", DataTypes.TINYINT().notNull()), + Column.physical("f", DataTypes.DATE()), + Column.physical("g", DataTypes.TIMESTAMP().notNull())), + Collections.emptyList(), + UniqueConstraint.primaryKey("name", Arrays.asList("a", "g"))); + GenericRowData rowData = + GenericRowData.of( + 1L, + 12345, + StringData.fromString("ABCDE"), + 12.12f, + (byte) 2, + 12345, + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12"))); + + String index = "writing-documents"; + ElasticSearch8AsyncDynamicTableFactory sinkFactory = + new ElasticSearch8AsyncDynamicTableFactory(); + + DynamicTableSink.SinkRuntimeProvider runtimeProvider = + sinkFactory + .createDynamicTableSink( + getPrefilledTestContext(index).withSchema(schema).build()) + .getSinkRuntimeProvider(new MockContext()); + + final SinkV2Provider sinkProvider = (SinkV2Provider) runtimeProvider; + final Sink sink = sinkProvider.createSink(); + StreamExecutionEnvironment environment = + StreamExecutionEnvironment.getExecutionEnvironment(); + + environment.setParallelism(PARALLELISM); + + rowData.setRowKind(RowKind.UPDATE_AFTER); + environment.fromData(rowData).sinkTo(sink); + environment.execute(); + + Map response = makeGetRequest(index, "1_2012-12-12T12:12:12"); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "00:00:12"); + expectedMap.put("c", "ABCDE"); + expectedMap.put("d", 12.12d); + expectedMap.put("e", 2); + expectedMap.put("f", "2003-10-20"); + expectedMap.put("g", "2012-12-12 12:12:12"); + assertThat(response).isEqualTo(expectedMap); + } + + @TestTemplate + public void testWritingDocumentsFromTableApi() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "table-api"; + tableEnvironment.executeSql( + "CREATE TABLE esTable (" + + "a BIGINT NOT NULL,\n" + + "b TIME,\n" + + "c STRING NOT NULL,\n" + + "d FLOAT,\n" + + "e TINYINT NOT NULL,\n" + + "f DATE,\n" + + "g TIMESTAMP NOT NULL,\n" + + "h as a + 2,\n" + + "PRIMARY KEY (a, g) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues( + row( + 1L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "ABCDE", + 12.12f, + (byte) 2, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("esTable") + .await(); + + Map response = makeGetRequest(index, "1_2012-12-12T12:12:12"); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "00:00:12"); + expectedMap.put("c", "ABCDE"); + expectedMap.put("d", 12.12d); + expectedMap.put("e", 2); + expectedMap.put("f", "2003-10-20"); + expectedMap.put("g", "2012-12-12 12:12:12"); + assertThat(response).isEqualTo(expectedMap); + } + + @TestTemplate + public void testWritingDocumentsNoPrimaryKey() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "no-primary-key"; + tableEnvironment.executeSql( + "CREATE TABLE esTable (" + + "a BIGINT NOT NULL,\n" + + "b TIME,\n" + + "c STRING NOT NULL,\n" + + "d FLOAT,\n" + + "e TINYINT NOT NULL,\n" + + "f DATE,\n" + + "g TIMESTAMP NOT NULL\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues( + row( + 1L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "ABCDE", + 12.12f, + (byte) 2, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2012-12-12T12:12:12")), + row( + 2L, + LocalTime.ofNanoOfDay(12345L * 1_000_000L), + "FGHIJK", + 13.13f, + (byte) 4, + LocalDate.ofEpochDay(12345), + LocalDateTime.parse("2013-12-12T13:13:13"))) + .executeInsert("esTable") + .await(); + + // search API does not return documents that were not indexed, we might need to query + // the index a few times + Deadline deadline = Deadline.fromNow(Duration.ofSeconds(30)); + List> hits; + do { + hits = makeSearchRequest(index); + if (hits.size() < 2) { + Thread.sleep(200); + } + } while (hits.size() < 2 && deadline.hasTimeLeft()); + + if (hits.size() < 2) { + throw new AssertionError("Could not retrieve results from Elasticsearch."); + } + + HashSet> resultSet = new HashSet<>(); + resultSet.add(hits.get(0)); + resultSet.add(hits.get(1)); + Map expectedMap1 = new HashMap<>(); + expectedMap1.put("a", 1); + expectedMap1.put("b", "00:00:12"); + expectedMap1.put("c", "ABCDE"); + expectedMap1.put("d", 12.12d); + expectedMap1.put("e", 2); + expectedMap1.put("f", "2003-10-20"); + expectedMap1.put("g", "2012-12-12 12:12:12"); + Map expectedMap2 = new HashMap<>(); + expectedMap2.put("a", 2); + expectedMap2.put("b", "00:00:12"); + expectedMap2.put("c", "FGHIJK"); + expectedMap2.put("d", 13.13d); + expectedMap2.put("e", 4); + expectedMap2.put("f", "2003-10-20"); + expectedMap2.put("g", "2013-12-12 13:13:13"); + HashSet> expectedSet = new HashSet<>(); + expectedSet.add(expectedMap1); + expectedSet.add(expectedMap2); + assertThat(resultSet).isEqualTo(expectedSet); + } + + @TestTemplate + public void testWritingDocumentsWithDynamicIndex() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + String index = "dynamic-index-{b|yyyy-MM-dd}"; + tableEnvironment.executeSql( + "CREATE TABLE esTable (" + + "a BIGINT NOT NULL,\n" + + "b TIMESTAMP NOT NULL,\n" + + "PRIMARY KEY (a) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + tableEnvironment + .fromValues(row(1L, LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("esTable") + .await(); + + Map response = makeGetRequest("dynamic-index-2012-12-12", "1"); + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + expectedMap.put("b", "2012-12-12 12:12:12"); + assertThat(response).isEqualTo(expectedMap); + } + + @TestTemplate + public void testWritingDocumentsWithDynamicIndexFromSystemTime() throws Exception { + TableEnvironment tableEnvironment = + TableEnvironment.create(EnvironmentSettings.inStreamingMode()); + + DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + tableEnvironment.getConfig().set(TableConfigOptions.LOCAL_TIME_ZONE, "Asia/Shanghai"); + + String dynamicIndex1 = + "dynamic-index-" + + dateTimeFormatter.format(LocalDateTime.now(ZoneId.of("Asia/Shanghai"))) + + "_index"; + + String index = "dynamic-index-{now()|yyyy-MM-dd}_index"; + + tableEnvironment.executeSql( + "CREATE TABLE esTable (" + + "a BIGINT NOT NULL,\n" + + "b TIMESTAMP NOT NULL,\n" + + "PRIMARY KEY (a) NOT ENFORCED\n" + + ")\n" + + "WITH (\n" + + getConnectorSql(index) + + ")"); + + String dynamicIndex2 = + "dynamic-index-" + + dateTimeFormatter.format(LocalDateTime.now(ZoneId.of("Asia/Shanghai"))) + + "_index"; + + tableEnvironment + .fromValues(row(1L, LocalDateTime.parse("2012-12-12T12:12:12"))) + .executeInsert("esTable") + .await(); + + Map response; + try { + response = makeGetRequest(dynamicIndex1, "1"); + } catch (Exception e) { + if (e.getMessage().contains("index_not_found_exception")) { + response = makeGetRequest(dynamicIndex2, "1"); + } else { + throw e; + } + } + + Map expectedMap = new HashMap<>(); + expectedMap.put("a", 1); + + expectedMap.put("b", "2012-12-12 12:12:12"); + assertThat(response).isEqualTo(expectedMap); + } +} diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorTest.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorTest.java new file mode 100644 index 00000000..00dc3ae8 --- /dev/null +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/IndexGeneratorTest.java @@ -0,0 +1,352 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.TableException; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.DataType; + +import org.junit.jupiter.api.Test; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; +import static org.junit.jupiter.api.Assumptions.assumingThat; + +/** Suite tests for {@link IndexGenerator}. */ +public class IndexGeneratorTest { + + private static final List fieldNames = + Arrays.asList( + "id", + "item", + "log_ts", + "log_date", + "order_timestamp", + "log_time", + "local_datetime", + "local_date", + "local_time", + "local_timestamp", + "note", + "status"); + + private static final List dataTypes = + Arrays.asList( + DataTypes.INT(), + DataTypes.STRING(), + DataTypes.BIGINT(), + DataTypes.DATE().bridgedTo(Date.class), + DataTypes.TIMESTAMP().bridgedTo(Timestamp.class), + DataTypes.TIME().bridgedTo(Time.class), + DataTypes.TIMESTAMP().bridgedTo(LocalDateTime.class), + DataTypes.DATE().bridgedTo(LocalDate.class), + DataTypes.TIME().bridgedTo(LocalTime.class), + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(), + DataTypes.STRING(), + DataTypes.BOOLEAN()); + + private static final List rows = + Arrays.asList( + GenericRowData.of( + 1, + StringData.fromString("apple"), + Timestamp.valueOf("2020-03-18 12:12:14").getTime(), + (int) Date.valueOf("2020-03-18").toLocalDate().toEpochDay(), + TimestampData.fromTimestamp(Timestamp.valueOf("2020-03-18 12:12:14")), + (int) + (Time.valueOf("12:12:14").toLocalTime().toNanoOfDay() + / 1_000_000L), + TimestampData.fromLocalDateTime( + LocalDateTime.of(2020, 3, 18, 12, 12, 14, 1000)), + (int) LocalDate.of(2020, 3, 18).toEpochDay(), + (int) (LocalTime.of(12, 13, 14, 2000).toNanoOfDay() / 1_000_000L), + TimestampData.fromInstant( + LocalDateTime.of(2020, 3, 18, 3, 12, 14, 1000) + .atZone(ZoneId.of("Asia/Shanghai")) + .toInstant()), + "test1", + true), + GenericRowData.of( + 2, + StringData.fromString("peanut"), + Timestamp.valueOf("2020-03-19 12:22:14").getTime(), + (int) Date.valueOf("2020-03-19").toLocalDate().toEpochDay(), + TimestampData.fromTimestamp(Timestamp.valueOf("2020-03-19 12:22:21")), + (int) + (Time.valueOf("12:22:21").toLocalTime().toNanoOfDay() + / 1_000_000L), + TimestampData.fromLocalDateTime( + LocalDateTime.of(2020, 3, 19, 12, 22, 14, 1000)), + (int) LocalDate.of(2020, 3, 19).toEpochDay(), + (int) (LocalTime.of(12, 13, 14, 2000).toNanoOfDay() / 1_000_000L), + TimestampData.fromInstant( + LocalDateTime.of(2020, 3, 19, 20, 22, 14, 1000) + .atZone(ZoneId.of("America/Los_Angeles")) + .toInstant()), + "test2", + false)); + + @Test + public void testDynamicIndexFromTimestampTzUTC() { + assumingThat( + ZoneId.systemDefault().equals(ZoneId.of("UTC")), + () -> { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{local_timestamp|yyyy_MM_dd_HH-ss}_index", + fieldNames, + dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))) + .isEqualTo("2020_03_17_19-14_index"); + assertThat(indexGenerator.generate(rows.get(1))) + .isEqualTo("2020_03_20_03-14_index"); + }); + } + + @Test + public void testDynamicIndexFromTimestampTzWithSpecificTimezone() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{local_timestamp|yyyy_MM_dd_HH-ss}_index", + fieldNames, + dataTypes, + ZoneId.of("Europe/Berlin")); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("2020_03_17_20-14_index"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("2020_03_20_04-14_index"); + } + + @Test + public void testDynamicIndexFromTimestamp() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{order_timestamp|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("2020_03_18_12-14_index"); + IndexGenerator indexGenerator1 = + IndexGeneratorFactory.createIndexGenerator( + "{order_timestamp|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); + indexGenerator1.open(); + assertThat(indexGenerator1.generate(rows.get(1))).isEqualTo("2020_03_19_12_22_index"); + } + + @Test + public void testDynamicIndexFromLocalDateTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "{local_datetime|yyyy_MM_dd_HH-ss}_index", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("2020_03_18_12-14_index"); + IndexGenerator indexGenerator1 = + IndexGeneratorFactory.createIndexGenerator( + "{local_datetime|yyyy_MM_dd_HH_mm}_index", fieldNames, dataTypes); + indexGenerator1.open(); + assertThat(indexGenerator1.generate(rows.get(1))).isEqualTo("2020_03_19_12_22_index"); + } + + @Test + public void testDynamicIndexFromDate() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{log_date|yyyy/MM/dd}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-2020/03/18"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-2020/03/19"); + } + + @Test + public void testDynamicIndexFromLocalDate() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_date|yyyy/MM/dd}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-2020/03/18"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-2020/03/19"); + } + + @Test + public void testDynamicIndexFromTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{log_time|HH-mm}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12-12"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12-22"); + } + + @Test + public void testDynamicIndexFromLocalTime() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_time|HH-mm}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12-13"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12-13"); + } + + @Test + public void testDynamicIndexDefaultFormat() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_time|}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index-12_13_14"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index-12_13_14"); + } + + @Test + public void testDynamicIndexFromSystemTime() { + List supportedUseCases = + Arrays.asList( + "now()", + "NOW()", + "now( )", + "NOW(\t)", + "\t NOW( ) \t", + "current_timestamp", + "CURRENT_TIMESTAMP", + "\tcurrent_timestamp\t", + " current_timestamp "); + + supportedUseCases.stream() + .forEach( + f -> { + DateTimeFormatter dateTimeFormatter = + DateTimeFormatter.ofPattern("yyyy_MM_dd"); + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + String.format("my-index-{%s|yyyy_MM_dd}", f), + fieldNames, + dataTypes); + indexGenerator.open(); + // The date may change during the running of the unit test. + // Generate expected index-name based on the current time + // before and after calling the generate method. + String expectedIndex1 = + "my-index-" + LocalDateTime.now().format(dateTimeFormatter); + String actualIndex = indexGenerator.generate(rows.get(1)); + String expectedIndex2 = + "my-index-" + LocalDateTime.now().format(dateTimeFormatter); + assertThat( + actualIndex.equals(expectedIndex1) + || actualIndex.equals(expectedIndex2)) + .isTrue(); + }); + + List invalidUseCases = + Arrays.asList( + "now", + "now(", + "NOW", + "NOW)", + "current_timestamp()", + "CURRENT_TIMESTAMP()", + "CURRENT_timestamp"); + invalidUseCases.stream() + .forEach( + f -> { + String expectedExceptionMsg = + String.format( + "Unknown field '%s' in index pattern 'my-index-{%s|yyyy_MM_dd}'," + + " please check the field name.", + f, f); + try { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator( + String.format("my-index-{%s|yyyy_MM_dd}", f), + fieldNames, + dataTypes); + indexGenerator.open(); + } catch (TableException e) { + assertThat(e).hasMessage(expectedExceptionMsg); + } + }); + } + + @Test + public void testGeneralDynamicIndex() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator("index_{item}", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("index_apple"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("index_peanut"); + } + + @Test + public void testStaticIndex() { + IndexGenerator indexGenerator = + IndexGeneratorFactory.createIndexGenerator("my-index", fieldNames, dataTypes); + indexGenerator.open(); + assertThat(indexGenerator.generate(rows.get(0))).isEqualTo("my-index"); + assertThat(indexGenerator.generate(rows.get(1))).isEqualTo("my-index"); + } + + @Test + public void testUnknownField() { + String expectedExceptionMsg = + "Unknown field 'unknown_ts' in index pattern 'my-index-{unknown_ts|yyyy-MM-dd}'," + + " please check the field name."; + assertThatThrownBy( + () -> + IndexGeneratorFactory.createIndexGenerator( + "my-index-{unknown_ts|yyyy-MM-dd}", fieldNames, dataTypes)) + .isInstanceOf(TableException.class) + .hasMessage(expectedExceptionMsg); + } + + @Test + public void testUnsupportedTimeType() { + String expectedExceptionMsg = + "Unsupported type 'INT' found in Elasticsearch dynamic index field, " + + "time-related pattern only support types are: DATE,TIME,TIMESTAMP."; + assertThatThrownBy( + () -> + IndexGeneratorFactory.createIndexGenerator( + "my-index-{id|yyyy-MM-dd}", fieldNames, dataTypes)) + .isInstanceOf(TableException.class) + .hasMessage(expectedExceptionMsg); + } + + @Test + public void testUnsupportedMultiParametersType() { + String expectedExceptionMsg = + "Chaining dynamic index pattern my-index-{local_date}-{local_time} is not supported," + + " only support single dynamic index pattern."; + assertThatThrownBy( + () -> + IndexGeneratorFactory.createIndexGenerator( + "my-index-{local_date}-{local_time}", + fieldNames, + dataTypes)) + .isInstanceOf(TableException.class) + .hasMessage(expectedExceptionMsg); + } + + @Test + public void testUnsupportedIndexFieldType() { + String expectedExceptionMsg = + "Unsupported type BOOLEAN of index field, Supported types are:" + + " [DATE, TIME_WITHOUT_TIME_ZONE, TIMESTAMP_WITHOUT_TIME_ZONE, TIMESTAMP_WITH_TIME_ZONE," + + " TIMESTAMP_WITH_LOCAL_TIME_ZONE, VARCHAR, CHAR, TINYINT, INTEGER, BIGINT]"; + assertThatThrownBy( + () -> + IndexGeneratorFactory.createIndexGenerator( + "index_{status}", fieldNames, dataTypes)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage(expectedExceptionMsg); + } +} diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/KeyExtractorTest.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/KeyExtractorTest.java new file mode 100644 index 00000000..8d02db3f --- /dev/null +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/KeyExtractorTest.java @@ -0,0 +1,130 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.data.GenericRowData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.StringData; +import org.apache.flink.table.data.TimestampData; + +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link KeyExtractor}. */ +public class KeyExtractorTest { + @Test + public void testSimpleKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.BIGINT().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); + assertThat(key).isEqualTo("12"); + } + + @Test + public void testNoPrimaryKey() { + List logicalTypesWithIndex = Collections.emptyList(); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = keyExtractor.apply(GenericRowData.of(12L, StringData.fromString("ABCD"))); + assertThat(key).isNull(); + } + + @Test + public void testTwoFieldsKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.BIGINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 2, DataTypes.TIMESTAMP().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = + keyExtractor.apply( + GenericRowData.of( + 12L, + StringData.fromString("ABCD"), + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12")))); + assertThat(key).isEqualTo("12_2012-12-12T12:12:12"); + } + + @Test + public void testAllTypesKey() { + List logicalTypesWithIndex = + Stream.of( + new LogicalTypeWithIndex( + 0, DataTypes.TINYINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 1, DataTypes.SMALLINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 2, DataTypes.INT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 3, DataTypes.BIGINT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 4, DataTypes.BOOLEAN().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 5, DataTypes.FLOAT().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 6, DataTypes.DOUBLE().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 7, DataTypes.STRING().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 8, DataTypes.TIMESTAMP().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 9, + DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE() + .notNull() + .getLogicalType()), + new LogicalTypeWithIndex( + 10, DataTypes.TIME().notNull().getLogicalType()), + new LogicalTypeWithIndex( + 11, DataTypes.DATE().notNull().getLogicalType())) + .collect(Collectors.toList()); + + Function keyExtractor = + KeyExtractor.createKeyExtractor(logicalTypesWithIndex, "_"); + + String key = + keyExtractor.apply( + GenericRowData.of( + (byte) 1, + (short) 2, + 3, + (long) 4, + true, + 1.0f, + 2.0d, + StringData.fromString("ABCD"), + TimestampData.fromLocalDateTime( + LocalDateTime.parse("2012-12-12T12:12:12")), + TimestampData.fromInstant(Instant.parse("2013-01-13T13:13:13Z")), + (int) (LocalTime.parse("14:14:14").toNanoOfDay() / 1_000_000), + (int) LocalDate.parse("2015-05-15").toEpochDay())); + assertThat(key) + .isEqualTo( + "1_2_3_4_true_1.0_2.0_ABCD_2012-12-12T12:12:12_2013-01-13T13:13:13_14:14:14_2015-05-15"); + } +} diff --git a/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/TestContext.java b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/TestContext.java new file mode 100644 index 00000000..4c225b3d --- /dev/null +++ b/flink-connector-elasticsearch8/src/test/java/org/apache/flink/connector/elasticsearch/table/TestContext.java @@ -0,0 +1,55 @@ +package org.apache.flink.connector.elasticsearch.table; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** A utility class for mocking {@link DynamicTableFactory.Context}. */ +class TestContext { + + private ResolvedSchema schema = ResolvedSchema.of(Column.physical("a", DataTypes.TIME())); + + private final Map options = new HashMap<>(); + + public static TestContext context() { + return new TestContext(); + } + + public TestContext withSchema(ResolvedSchema schema) { + this.schema = schema; + return this; + } + + DynamicTableFactory.Context build() { + return new FactoryUtil.DefaultDynamicTableContext( + ObjectIdentifier.of("default", "default", "t1"), + new ResolvedCatalogTable( + CatalogTable.newBuilder() + .schema(Schema.newBuilder().fromResolvedSchema(schema).build()) + .comment("mock context") + .partitionKeys(Collections.emptyList()) + .options(options) + .build(), + schema), + Collections.emptyMap(), + new Configuration(), + TestContext.class.getClassLoader(), + false); + } + + public TestContext withOption(String key, String value) { + options.put(key, value); + return this; + } +}