Skip to content

Commit 2a1b6a5

Browse files
Modify default encoding detector
Replace HtmlEncodingDetector to StandardHtmlEncodingDetector Adjust some test case
1 parent 5021f34 commit 2a1b6a5

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

tika-parsers/src/main/resources/META-INF/services/org.apache.tika.detect.EncodingDetector

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
org.apache.tika.parser.html.HtmlEncodingDetector
16+
org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector
1717
org.apache.tika.parser.txt.UniversalEncodingDetector
1818
org.apache.tika.parser.txt.Icu4jEncodingDetector

tika-parsers/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.tika.parser.Parser;
4040
import org.apache.tika.parser.ParserDecorator;
4141
import org.apache.tika.parser.html.HtmlEncodingDetector;
42+
import org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector;
4243
import org.apache.tika.parser.txt.Icu4jEncodingDetector;
4344
import org.apache.tika.parser.txt.TXTParser;
4445
import org.apache.tika.parser.txt.UniversalEncodingDetector;
@@ -52,7 +53,7 @@ public void testDefault() {
5253
assertTrue(detector instanceof CompositeEncodingDetector);
5354
List<EncodingDetector> detectors = ((CompositeEncodingDetector) detector).getDetectors();
5455
assertEquals(3, detectors.size());
55-
assertTrue(detectors.get(0) instanceof HtmlEncodingDetector);
56+
assertTrue(detectors.get(0) instanceof StandardHtmlEncodingDetector);
5657
assertTrue(detectors.get(1) instanceof UniversalEncodingDetector);
5758
assertTrue(detectors.get(2) instanceof Icu4jEncodingDetector);
5859
}

tika-parsers/src/test/resources/org/apache/tika/config/TIKA-2273-exclude-encoding-detector-default.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919
<!-- Explicitly request default parsers -->
2020
<parsers/>
2121
<encodingDetectors>
22-
<!-- All detectors except HtmlEncodingDetector -->
22+
<!-- All detectors except StandardHtmlEncodingDetector -->
2323
<encodingDetector class="org.apache.tika.detect.DefaultEncodingDetector">
24-
<encodingDetector-exclude class="org.apache.tika.parser.html.HtmlEncodingDetector"/>
24+
<encodingDetector-exclude class="org.apache.tika.parser.html.charsetdetector.StandardHtmlEncodingDetector"/>
2525
</encodingDetector>
2626
<!-- One other detector, to check ordering -->
2727
<encodingDetector class="org.apache.tika.detect.NonDetectingEncodingDetector"/>

0 commit comments

Comments
 (0)