Skip to content

Commit d929555

Browse files
authored
Merge pull request #35777 from vespa-engine/hmusum/throw-IllegalArgumentException-when-getting-unknown-language-tag
Throw IllegalArgumentException when unknown language tag is used in set_language
2 parents 51767cf + eca3c90 commit d929555

3 files changed

Lines changed: 38 additions & 8 deletions

File tree

indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/SetLanguageTestCase.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import static com.yahoo.vespa.indexinglanguage.expressions.ExpressionAssert.assertVerify;
1111
import static com.yahoo.vespa.indexinglanguage.expressions.ExpressionAssert.assertVerifyThrows;
1212
import static org.junit.Assert.assertEquals;
13-
import static org.junit.Assert.assertFalse;
1413
import static org.junit.Assert.assertNotEquals;
14+
import static org.junit.Assert.assertThrows;
1515

1616
/**
1717
* @author Simon Thoresen Hult
@@ -57,4 +57,11 @@ public void testSettingMissingValue() {
5757
assertEquals(Language.UNKNOWN, ctx.getLanguage());
5858
}
5959

60+
@Test
61+
public void testSettingIllegalValue() {
62+
ExecutionContext ctx = new ExecutionContext(new SimpleTestAdapter());
63+
ctx.setCurrentValue(new StringFieldValue("this is part of a long string which is mistakenly used as language"));
64+
assertThrows(IllegalArgumentException.class, () -> new SetLanguageExpression().execute(ctx));
65+
}
66+
6067
}

linguistics/src/main/java/com/yahoo/language/LocaleFactory.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
22
package com.yahoo.language;
33

4+
import com.yahoo.text.Text;
5+
6+
import java.util.IllformedLocaleException;
47
import java.util.Locale;
58
import java.util.Objects;
69

@@ -21,6 +24,15 @@ private LocaleFactory() {}
2124
*/
2225
public static Locale fromLanguageTag(String tag) {
2326
Objects.requireNonNull(tag, "tag cannot be null");
27+
// Tags can have variants and extension making them arbitrarily long,
28+
// but tags exceeding 35-40 characters are extremely rare in production systems.
29+
// Truncate in case people mistakenly use large text fields as tags
30+
var truncatedTag = Text.truncate(tag, 45);
31+
// truncate adds whitespace, so check original tag for whitespace
32+
if (tag.contains(" ")) {
33+
throw new IllegalArgumentException("Illegal language tag '" + truncatedTag + "', language tags corresponding to RFC5646 cannot contain whitespace");
34+
}
35+
tag = truncatedTag;
2436

2537
tag = tag.trim();
2638
if (tag.isEmpty()) return UNKNOWN;
@@ -45,7 +57,11 @@ public static Locale fromLanguageTag(String tag) {
4557
}
4658
}
4759
if (language.isEmpty()) return UNKNOWN;
48-
return new Locale.Builder().setLanguage(language).setScript(script).setRegion(region).build();
60+
try {
61+
return new Locale.Builder().setLanguage(language).setScript(script).setRegion(region).build();
62+
} catch (IllformedLocaleException e) {
63+
throw new IllegalArgumentException("Illegal language tag '" + tag + "', it must be a language tag corresponding to RFC5646");
64+
}
4965
}
5066

5167
}

linguistics/src/test/java/com/yahoo/language/LocaleFactoryTestCase.java

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import java.util.Locale;
77

88
import static org.junit.Assert.assertEquals;
9+
import static org.junit.Assert.assertThrows;
910
import static org.junit.Assert.assertTrue;
1011
import static org.junit.Assert.fail;
1112

@@ -31,16 +32,22 @@ public void requireThatLocaleCanBeCreatedFromLanguageTag() {
3132
assertLocale("es", "es", "", "");
3233
assertLocale("es-419", "es", "", "419");
3334

34-
try {
35-
LocaleFactory.fromLanguageTag(null);
36-
fail();
37-
} catch (NullPointerException e) {
38-
39-
}
35+
assertThrows(NullPointerException.class, () ->LocaleFactory.fromLanguageTag(null));
4036

4137
assertLocale("", "", "", "");
4238
assertLocale("z-foo", "", "", "");
4339
assertLocale("ojeroierhoiherohjdadsfodsfoifiopeoipefwoipfwe", "", "", "");
40+
41+
var exception = assertThrows(IllegalArgumentException.class,
42+
() -> LocaleFactory.fromLanguageTag("foo-13-ojeroierhoiherohjdadsfodsfoifiopeoipefwoipfwe"));
43+
assertEquals("Illegal language tag 'foo-13-ojeroierhoiherohjdadsfodsfoifiopeo ...', it must be a language tag corresponding to RFC5646",
44+
exception.getMessage());
45+
46+
var languageTag = "this is part of a long string which should be cut off at 20 characters";
47+
exception = assertThrows(IllegalArgumentException.class,
48+
() -> LocaleFactory.fromLanguageTag(languageTag));
49+
assertEquals("Illegal language tag 'this is part of a long string which shoul ...', language tags corresponding to RFC5646 cannot contain whitespace",
50+
exception.getMessage());
4451
}
4552

4653
private static void assertLocale(String tag, String language, String variant, String country) {

0 commit comments

Comments
 (0)