Skip to content

Update Text class to use native java ByteBuffer #127666

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 8, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,17 @@
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.common.text;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;
package org.elasticsearch.xcontent;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

/**
* Both {@link String} and {@link BytesReference} representation of the text. Starts with one of those, and if
* Both {@link String} and {@link ByteBuffer} representation of the text. Starts with one of those, and if
* the other is requests, caches the other one in a local reference so no additional conversion will be needed.
*/
public final class Text implements Comparable<Text>, ToXContentFragment {
public final class Text implements XContentString, Comparable<Text>, ToXContentFragment {

public static final Text[] EMPTY_ARRAY = new Text[0];

Expand All @@ -36,31 +31,43 @@ public static Text[] convertFromStringArray(String[] strings) {
return texts;
}

private BytesReference bytes;
private ByteBuffer bytes;
private String text;
private int hash;
private int stringLength = -1;

/**
* Construct a Text from a UTF-8 encoded ByteBuffer. Since no string length is specified, {@link #stringLength()}
* will perform a string conversion to measure the string length.
*/
public Text(ByteBuffer bytes) {
this.bytes = bytes;
}

public Text(BytesReference bytes) {
/**
* Construct a Text from a UTF-8 encoded ByteBuffer and an explicit string length. Used to avoid string conversion
* in {@link #stringLength()}.
*/
public Text(ByteBuffer bytes, int stringLength) {
this.bytes = bytes;
this.stringLength = stringLength;
}

public Text(String text) {
this.text = text;
}

/**
* Whether a {@link BytesReference} view of the data is already materialized.
* Whether a {@link ByteBuffer} view of the data is already materialized.
*/
public boolean hasBytes() {
return bytes != null;
}

/**
* Returns a {@link BytesReference} view of the data.
*/
public BytesReference bytes() {
@Override
public ByteBuffer bytes() {
if (bytes == null) {
bytes = new BytesArray(text.getBytes(StandardCharsets.UTF_8));
bytes = StandardCharsets.UTF_8.encode(text);
}
return bytes;
}
Expand All @@ -72,11 +79,20 @@ public boolean hasString() {
return text != null;
}

/**
* Returns a {@link String} view of the data.
*/
@Override
public String string() {
return text == null ? bytes.utf8ToString() : text;
if (text == null) {
text = StandardCharsets.UTF_8.decode(bytes).toString();
}
return text;
}

@Override
public int stringLength() {
if (stringLength < 0) {
stringLength = string().length();
}
return stringLength;
}

@Override
Expand Down Expand Up @@ -115,8 +131,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
} else {
// TODO: TextBytesOptimization we can use a buffer here to convert it? maybe add a
// request to jackson to support InputStream as well?
BytesRef br = this.bytes().toBytesRef();
return builder.utf8Value(br.bytes, br.offset, br.length);
assert bytes.hasArray();
return builder.utf8Value(bytes.array(), bytes.arrayOffset() + bytes.position(), bytes.remaining());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.xcontent;

import java.nio.ByteBuffer;

public interface XContentString {
/**
* Returns a {@link String} view of the data.
*/
String string();

/**
* Returns a UTF8-encoded {@link ByteBuffer} view of the data.
*/
ByteBuffer bytes();

/**
* Returns the number of characters in the represented string.
*/
int stringLength();
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.CountDown;
Expand All @@ -56,6 +55,7 @@
import org.elasticsearch.tasks.TaskManager;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.Text;

import java.util.ArrayList;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.CharArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;

import java.io.EOFException;
import java.io.FilterInputStream;
Expand Down Expand Up @@ -391,13 +391,23 @@ public Text readOptionalText() throws IOException {
if (length == -1) {
return null;
}
return new Text(readBytesReference(length));
byte[] bytes = new byte[length];
if (length > 0) {
readBytes(bytes, 0, length);
}
var byteBuff = ByteBuffer.wrap(bytes);
return new Text(byteBuff);
}

public Text readText() throws IOException {
// use StringAndBytes so we can cache the string if it's ever converted to it
// use Text so we can cache the string if it's ever converted to it
int length = readInt();
return new Text(readBytesReference(length));
byte[] bytes = new byte[length];
if (length > 0) {
readBytes(bytes, 0, length);
}
var byteBuff = ByteBuffer.wrap(bytes);
return new Text(byteBuff);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.io.stream.Writeable.Writer;
import org.elasticsearch.common.settings.SecureString;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.ByteUtils;
import org.elasticsearch.core.CharArrays;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentType;

import java.io.IOException;
Expand Down Expand Up @@ -419,7 +419,7 @@ public void writeText(Text text) throws IOException {
writeInt(spare.length());
write(spare.bytes(), 0, spare.length());
} else {
BytesReference bytes = text.bytes();
BytesReference bytes = BytesReference.fromByteBuffer(text.bytes());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could support reading/writing ByteBuffer directly in StreamInput/Output so that conversions are not necessary here. That could allow optimizing in the ByteArrayStreamInput case to create a ByteBuffer that wraps the underlying byte array with appropriate offset/length, without needing to copy bytes to a new array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll look into that, thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'm thinking this might be a bigger change than we want to incorporate into this PR. It might be better to leave it as a follow-up, since the current implementation will still work, it'll just be less optimized.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with leaving this in a follow-up, it will help with reviews too

writeInt(bytes.length());
bytes.writeTo(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.xcontent.ChunkedToXContent;
import org.elasticsearch.common.xcontent.XContentHelper;
Expand All @@ -39,6 +38,7 @@
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.transport.LeakTracker;
import org.elasticsearch.transport.RemoteClusterAware;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.transport.RemoteClusterAware;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Objects;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -34,6 +33,7 @@
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.text.BreakIterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
Expand All @@ -33,6 +32,7 @@
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.Source;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.text.BreakIterator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.util.BytesRefHash;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.rest.action.search.RestSearchAction;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import org.apache.lucene.search.suggest.document.TopSuggestDocs;
import org.apache.lucene.search.suggest.document.TopSuggestDocsCollector;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Collections;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.ParsedQuery;
import org.elasticsearch.index.query.QueryBuilder;
Expand All @@ -31,6 +30,7 @@
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
import org.elasticsearch.search.suggest.phrase.NoisyChannelSpellChecker.Result;
import org.elasticsearch.xcontent.Text;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentParser;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@

import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.Suggest.Suggestion;
import org.elasticsearch.xcontent.Text;

import java.io.IOException;
import java.util.Objects;
Expand Down
Loading