9
9
10
10
11
11
def test_partition_xlsx_from_filename (filename = "example-docs/stanley-cups.xlsx" ):
12
- elements = partition_xlsx (filename = filename )
12
+ elements = partition_xlsx (filename = filename , include_header = False )
13
13
14
14
assert all (isinstance (element , Table ) for element in elements )
15
15
assert len (elements ) == 2
@@ -23,7 +23,7 @@ def test_partition_xlsx_from_filename(filename="example-docs/stanley-cups.xlsx")
23
23
24
24
25
25
def test_partition_xlsx_from_filename_with_emoji (filename = "example-docs/emoji.xlsx" ):
26
- elements = partition_xlsx (filename = filename )
26
+ elements = partition_xlsx (filename = filename , include_header = False )
27
27
assert all (isinstance (element , Table ) for element in elements )
28
28
assert len (elements ) == 1
29
29
assert clean_extra_whitespace (elements [0 ].text ) == "🤠😅"
@@ -32,16 +32,27 @@ def test_partition_xlsx_from_filename_with_emoji(filename="example-docs/emoji.xl
32
32
def test_partition_xlsx_from_filename_with_metadata_filename (
33
33
filename = "example-docs/stanley-cups.xlsx" ,
34
34
):
35
- elements = partition_xlsx (filename = filename , metadata_filename = "test" )
35
+ elements = partition_xlsx (filename = filename , metadata_filename = "test" , include_header = False )
36
36
37
37
assert all (isinstance (element , Table ) for element in elements )
38
38
assert clean_extra_whitespace (elements [0 ].text ) == EXPECTED_TEXT
39
39
assert elements [0 ].metadata .filename == "test"
40
40
41
41
42
+ def test_partition_xlsx_from_filename_with_header (filename = "example-docs/stanley-cups.xlsx" ):
43
+ elements = partition_xlsx (filename = filename , include_header = True )
44
+ assert all (isinstance (element , Table ) for element in elements )
45
+ assert len (elements ) == 2
46
+ assert (
47
+ clean_extra_whitespace (elements [0 ].text )
48
+ == "Stanley Cups Unnamed: 1 Unnamed: 2 " + EXPECTED_TEXT
49
+ )
50
+ assert "<thead>" in elements [0 ].metadata .text_as_html
51
+
52
+
42
53
def test_partition_xlsx_from_file (filename = "example-docs/stanley-cups.xlsx" ):
43
54
with open (filename , "rb" ) as f :
44
- elements = partition_xlsx (file = f )
55
+ elements = partition_xlsx (file = f , include_header = False )
45
56
46
57
assert all (isinstance (element , Table ) for element in elements )
47
58
assert len (elements ) == 2
@@ -55,15 +66,28 @@ def test_partition_xlsx_from_file(filename="example-docs/stanley-cups.xlsx"):
55
66
56
67
def test_partition_xlsx_from_file_with_metadata_filename (filename = "example-docs/stanley-cups.xlsx" ):
57
68
with open (filename , "rb" ) as f :
58
- elements = partition_xlsx (file = f , metadata_filename = "test" )
69
+ elements = partition_xlsx (file = f , metadata_filename = "test" , include_header = False )
59
70
60
71
assert all (isinstance (element , Table ) for element in elements )
61
72
assert clean_extra_whitespace (elements [0 ].text ) == EXPECTED_TEXT
62
73
assert elements [0 ].metadata .filename == "test"
63
74
64
75
76
+ def test_partition_xlsx_from_file_with_header (filename = "example-docs/stanley-cups.xlsx" ):
77
+ with open (filename , "rb" ) as f :
78
+ elements = partition_xlsx (file = f , include_header = True )
79
+
80
+ assert all (isinstance (element , Table ) for element in elements )
81
+ assert len (elements ) == 2
82
+ assert (
83
+ clean_extra_whitespace (elements [0 ].text )
84
+ == "Stanley Cups Unnamed: 1 Unnamed: 2 " + EXPECTED_TEXT
85
+ )
86
+ assert "<thead>" in elements [0 ].metadata .text_as_html
87
+
88
+
65
89
def test_partition_xlsx_filename_exclude_metadata (filename = "example-docs/stanley-cups.xlsx" ):
66
- elements = partition_xlsx (filename = filename , include_metadata = False )
90
+ elements = partition_xlsx (filename = filename , include_metadata = False , include_header = False )
67
91
68
92
assert all (isinstance (element , Table ) for element in elements )
69
93
assert len (elements ) == 2
@@ -78,7 +102,7 @@ def test_partition_xlsx_filename_exclude_metadata(filename="example-docs/stanley
78
102
79
103
def test_partition_xlsx_from_file_exclude_metadata (filename = "example-docs/stanley-cups.xlsx" ):
80
104
with open (filename , "rb" ) as f :
81
- elements = partition_xlsx (file = f , include_metadata = False )
105
+ elements = partition_xlsx (file = f , include_metadata = False , include_header = False )
82
106
83
107
assert all (isinstance (element , Table ) for element in elements )
84
108
assert len (elements ) == 2
0 commit comments