Skip to content

Commit 90af867

Browse files
Modify default encoding detector
Replace HtmlEncodingDetector to StandardHtmlEncodingDetector Adjust some test case
1 parent fd6a5f5 commit 90af867

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
org.apache.tika.parser.html.HtmlEncodingDetector
1+
org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector

tika-parsers/tika-parser-integration-tests/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.tika.parser.Parser;
4343
import org.apache.tika.parser.ParserDecorator;
4444
import org.apache.tika.parser.html.HtmlEncodingDetector;
45+
import org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector;
4546
import org.apache.tika.parser.txt.Icu4jEncodingDetector;
4647
import org.apache.tika.parser.txt.TXTParser;
4748
import org.apache.tika.parser.txt.UniversalEncodingDetector;
@@ -55,7 +56,7 @@ public void testDefault() {
5556
assertTrue(detector instanceof CompositeEncodingDetector);
5657
List<EncodingDetector> detectors = ((CompositeEncodingDetector) detector).getDetectors();
5758
assertEquals(3, detectors.size());
58-
assertTrue(detectors.get(0) instanceof HtmlEncodingDetector);
59+
assertTrue(detectors.get(0) instanceof StandardHtmlEncodingDetector);
5960
assertTrue(detectors.get(1) instanceof UniversalEncodingDetector);
6061
assertTrue(detectors.get(2) instanceof Icu4jEncodingDetector);
6162
}

tika-parsers/tika-parser-integration-tests/src/test/resources/org/apache/tika/config/TIKA-2273-exclude-encoding-detector-default.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
<!-- Explicitly request default parsers -->
2020
<parsers/>
2121
<encodingDetectors>
22-
<!-- All detectors except HtmlEncodingDetector -->
22+
<!-- All detectors except StandardHtmlEncodingDetector -->
2323
<encodingDetector class="org.apache.tika.detect.DefaultEncodingDetector">
24-
<encodingDetector-exclude class="org.apache.tika.parser.html.HtmlEncodingDetector"/>
24+
<encodingDetector-exclude class="org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector"/>
2525
</encodingDetector>
2626
<!-- One other detector, to check ordering -->
2727
<encodingDetector class="org.apache.tika.detect.NonDetectingEncodingDetector"/>

0 commit comments

Comments
 (0)