@@ -45,16 +45,47 @@ def test_detect_filetype_from_filename(file, expected):
45
45
("fake-text.txt" , FileType .TXT ),
46
46
("fake-email.eml" , FileType .EML ),
47
47
("factbook.xml" , FileType .XML ),
48
- ("example-10k.html" , FileType .XML ),
48
+ # NOTE(robinson) - For the document, some operating systems return
49
+ # */xml and some return */html. Either could be acceptable depending on the OS
50
+ ("example-10k.html" , [FileType .HTML , FileType .XML ]),
49
51
("fake-html.html" , FileType .HTML ),
50
52
("fake-excel.xlsx" , FileType .XLSX ),
51
53
("fake-power-point.pptx" , FileType .PPTX ),
52
54
],
53
55
)
54
56
def test_detect_filetype_from_file (file , expected ):
57
+ expected = expected if isinstance (expected , list ) else [expected ]
55
58
filename = os .path .join (EXAMPLE_DOCS_DIRECTORY , file )
56
59
with open (filename , "rb" ) as f :
57
- assert detect_filetype (file = f ) == expected
60
+ assert detect_filetype (file = f ) in expected
61
+
62
+
63
+ def test_detect_xml_application_xml (monkeypatch ):
64
+ monkeypatch .setattr (magic , "from_file" , lambda * args , ** kwargs : "application/xml" )
65
+ filename = os .path .join (EXAMPLE_DOCS_DIRECTORY , "fake.xml" )
66
+ filetype = detect_filetype (filename = filename )
67
+ assert filetype == FileType .XML
68
+
69
+
70
+ def test_detect_xml_text_xml (monkeypatch ):
71
+ monkeypatch .setattr (magic , "from_file" , lambda * args , ** kwargs : "text/xml" )
72
+ filename = os .path .join (EXAMPLE_DOCS_DIRECTORY , "fake.xml" )
73
+ filetype = detect_filetype (filename = filename )
74
+ assert filetype == FileType .XML
75
+
76
+
77
+ def test_detect_html_application_xml (monkeypatch ):
78
+ monkeypatch .setattr (magic , "from_file" , lambda * args , ** kwargs : "application/xml" )
79
+ filename = os .path .join (EXAMPLE_DOCS_DIRECTORY , "fake.html" )
80
+ filetype = detect_filetype (filename = filename )
81
+ assert filetype == FileType .HTML
82
+
83
+
84
+ def test_detect_html_text_xml (monkeypatch ):
85
+ monkeypatch .setattr (magic , "from_file" , lambda * args , ** kwargs : "text/xml" )
86
+ filename = os .path .join (EXAMPLE_DOCS_DIRECTORY , "fake.html" )
87
+ filetype = detect_filetype (filename = filename )
88
+ assert filetype == FileType .HTML
58
89
59
90
60
91
def test_detect_docx_filetype_application_octet_stream (monkeypatch ):
0 commit comments