@@ -181,30 +181,35 @@ def partition_email(
181
181
else :
182
182
raise ValueError ("Only one of filename, file, or text can be specified." )
183
183
184
- content_map : Dict [str , str ] = {
185
- part .get_content_type (): part .get_payload () for part in msg .walk ()
186
- }
184
+ content_map : Dict [str , str ] = {}
185
+ for part in msg .walk ():
186
+ # NOTE(robinson) - content dispostiion is None for the content of the email itself.
187
+ # Other dispositions include "attachment" for attachments
188
+ if part .get_content_disposition () is not None :
189
+ continue
190
+ content_type = part .get_content_type ()
191
+ content_map [content_type ] = part .get_payload ()
187
192
188
193
content = content_map .get (content_source , "" )
189
194
if not content :
190
195
raise ValueError (f"{ content_source } content not found in email" )
191
196
192
- # NOTE(robinson) - In the .eml files, the HTML content gets stored in a format that
193
- # looks like the following, resulting in extraneous "=" characters in the output if
194
- # you don't clean it up
195
- # <ul> =
196
- # <li>Item 1</li>=
197
- # <li>Item 2<li>=
198
- # </ul>
199
- list_content = split_by_paragraph (content )
200
-
201
197
if content_source == "text/html" :
198
+ # NOTE(robinson) - In the .eml files, the HTML content gets stored in a format that
199
+ # looks like the following, resulting in extraneous "=" characters in the output if
200
+ # you don't clean it up
201
+ # <ul> =
202
+ # <li>Item 1</li>=
203
+ # <li>Item 2<li>=
204
+ # </ul>
205
+ list_content = content .split ("=\n " )
202
206
content = "" .join (list_content )
203
207
elements = partition_html (text = content )
204
208
for element in elements :
205
209
if isinstance (element , Text ):
206
210
element .apply (replace_mime_encodings )
207
211
elif content_source == "text/plain" :
212
+ list_content = split_by_paragraph (content )
208
213
elements = partition_text (text = content )
209
214
210
215
for idx , element in enumerate (elements ):
0 commit comments