Skip to content

Commit dc4147d

Browse files
authored
feat: extract tables (#503)
Exposes table extraction through partition and partition_pdf.
1 parent 5d1e61c commit dc4147d

31 files changed

+233
-171
lines changed

Diff for: CHANGELOG.md

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
## 0.5.15-dev2
2-
3-
### Features
4-
5-
* Adds support for extracting attachments from `.msg` files
6-
7-
## 0.5.14-dev1
1+
## 0.6.0
82

93
### Enhancements
104

@@ -17,8 +11,13 @@
1711

1812
### Features
1913

14+
* Table extraction is now possible for pdfs from `partition` and `partition_pdf`.
15+
* Adds support for extracting attachments from `.msg` files
16+
2017
### Fixes
2118

19+
* Adds an `ssl_verify` kwarg to `partition` and `partition_html` to enable turning off
20+
SSL verification for HTTP requests. SSL verification is on by default.
2221

2322
## 0.5.13
2423

Diff for: docs/requirements.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ alabaster==0.7.13
88
# via sphinx
99
babel==2.12.1
1010
# via sphinx
11-
beautifulsoup4==4.12.0
11+
beautifulsoup4==4.12.2
1212
# via furo
1313
certifi==2022.12.7
1414
# via
@@ -26,15 +26,15 @@ idna==3.4
2626
# via requests
2727
imagesize==1.4.1
2828
# via sphinx
29-
importlib-metadata==6.1.0
29+
importlib-metadata==6.5.0
3030
# via sphinx
3131
jinja2==3.1.2
3232
# via sphinx
3333
markupsafe==2.1.2
3434
# via jinja2
35-
packaging==23.0
35+
packaging==23.1
3636
# via sphinx
37-
pygments==2.14.0
37+
pygments==2.15.1
3838
# via
3939
# furo
4040
# sphinx
@@ -44,7 +44,7 @@ requests==2.28.2
4444
# via sphinx
4545
snowballstemmer==2.2.0
4646
# via sphinx
47-
soupsieve==2.4
47+
soupsieve==2.4.1
4848
# via beautifulsoup4
4949
sphinx==6.1.3
5050
# via

Diff for: docs/source/getting_started.rst

+1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ These element objects represent different components of the source document. Cur
6969
* ``ListItem``
7070
* ``Title``
7171
* ``Address``
72+
* ``Table``
7273
* ``PageBreak``
7374
* ``CheckBox``
7475
* ``Image``

Diff for: requirements/base.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#
77
anyio==3.6.2
88
# via httpcore
9-
argilla==1.5.0
9+
argilla==1.6.0
1010
# via unstructured (setup.py)
1111
backoff==2.2.1
1212
# via argilla
@@ -37,7 +37,7 @@ idna==3.4
3737
# anyio
3838
# requests
3939
# rfc3986
40-
importlib-metadata==6.1.0
40+
importlib-metadata==6.5.0
4141
# via markdown
4242
joblib==1.2.0
4343
# via nltk
@@ -62,19 +62,19 @@ olefile==0.46
6262
# via msg-parser
6363
openpyxl==3.1.2
6464
# via unstructured (setup.py)
65-
packaging==23.0
65+
packaging==23.1
6666
# via argilla
6767
pandas==1.5.3
6868
# via
6969
# argilla
7070
# unstructured (setup.py)
71-
pillow==9.4.0
71+
pillow==9.5.0
7272
# via
7373
# python-pptx
7474
# unstructured (setup.py)
7575
pydantic==1.10.7
7676
# via argilla
77-
pygments==2.14.0
77+
pygments==2.15.1
7878
# via rich
7979
pypandoc==1.11
8080
# via unstructured (setup.py)
@@ -117,7 +117,7 @@ wrapt==1.14.1
117117
# via
118118
# argilla
119119
# deprecated
120-
xlsxwriter==3.0.9
120+
xlsxwriter==3.1.0
121121
# via python-pptx
122122
zipp==3.15.0
123123
# via importlib-metadata

Diff for: requirements/build.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ alabaster==0.7.13
88
# via sphinx
99
babel==2.12.1
1010
# via sphinx
11-
beautifulsoup4==4.12.0
11+
beautifulsoup4==4.12.2
1212
# via furo
1313
certifi==2022.12.7
1414
# via
@@ -26,15 +26,15 @@ idna==3.4
2626
# via requests
2727
imagesize==1.4.1
2828
# via sphinx
29-
importlib-metadata==6.1.0
29+
importlib-metadata==6.5.0
3030
# via sphinx
3131
jinja2==3.1.2
3232
# via sphinx
3333
markupsafe==2.1.2
3434
# via jinja2
35-
packaging==23.0
35+
packaging==23.1
3636
# via sphinx
37-
pygments==2.14.0
37+
pygments==2.15.1
3838
# via
3939
# furo
4040
# sphinx
@@ -44,7 +44,7 @@ requests==2.28.2
4444
# via sphinx
4545
snowballstemmer==2.2.0
4646
# via sphinx
47-
soupsieve==2.4
47+
soupsieve==2.4.1
4848
# via beautifulsoup4
4949
sphinx==6.1.3
5050
# via

Diff for: requirements/dev.txt

+22-20
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ arrow==1.2.3
2121
# via isoduration
2222
asttokens==2.2.1
2323
# via stack-data
24-
attrs==22.2.0
24+
attrs==23.1.0
2525
# via jsonschema
2626
backcall==0.2.0
2727
# via ipython
28-
beautifulsoup4==4.12.0
28+
beautifulsoup4==4.12.2
2929
# via nbconvert
3030
bleach==6.0.0
3131
# via nbconvert
@@ -39,7 +39,7 @@ click==8.1.3
3939
# via pip-tools
4040
comm==0.1.3
4141
# via ipykernel
42-
debugpy==1.6.6
42+
debugpy==1.6.7
4343
# via ipykernel
4444
decorator==5.1.1
4545
# via ipython
@@ -51,7 +51,7 @@ executing==1.2.0
5151
# via stack-data
5252
fastjsonschema==2.16.3
5353
# via nbformat
54-
filelock==3.10.7
54+
filelock==3.12.0
5555
# via virtualenv
5656
fqdn==1.5.1
5757
# via jsonschema
@@ -61,7 +61,7 @@ idna==3.4
6161
# via
6262
# anyio
6363
# jsonschema
64-
importlib-metadata==6.1.0
64+
importlib-metadata==6.5.0
6565
# via
6666
# jupyter-client
6767
# nbconvert
@@ -75,7 +75,7 @@ ipykernel==6.22.0
7575
# nbclassic
7676
# notebook
7777
# qtconsole
78-
ipython==8.11.0
78+
ipython==8.12.0
7979
# via
8080
# -r requirements/dev.in
8181
# ipykernel
@@ -106,7 +106,7 @@ jsonschema[format-nongpl]==4.17.3
106106
# nbformat
107107
jupyter==1.0.0
108108
# via -r requirements/dev.in
109-
jupyter-client==8.1.0
109+
jupyter-client==8.2.0
110110
# via
111111
# ipykernel
112112
# jupyter-console
@@ -152,11 +152,11 @@ matplotlib-inline==0.1.6
152152
# ipython
153153
mistune==2.0.5
154154
# via nbconvert
155-
nbclassic==0.5.3
155+
nbclassic==0.5.5
156156
# via notebook
157-
nbclient==0.7.2
157+
nbclient==0.7.3
158158
# via nbconvert
159-
nbconvert==7.2.10
159+
nbconvert==7.3.1
160160
# via
161161
# jupyter
162162
# jupyter-server
@@ -176,11 +176,11 @@ nest-asyncio==1.5.6
176176
# notebook
177177
nodeenv==1.7.0
178178
# via pre-commit
179-
notebook==6.5.3
179+
notebook==6.5.4
180180
# via jupyter
181181
notebook-shim==0.2.2
182182
# via nbclassic
183-
packaging==23.0
183+
packaging==23.1
184184
# via
185185
# build
186186
# ipykernel
@@ -196,15 +196,15 @@ pexpect==4.8.0
196196
# via ipython
197197
pickleshare==0.7.5
198198
# via ipython
199-
pip-tools==6.12.3
199+
pip-tools==6.13.0
200200
# via -r requirements/dev.in
201201
pkgutil-resolve-name==1.3.10
202202
# via jsonschema
203203
platformdirs==3.2.0
204204
# via
205205
# jupyter-core
206206
# virtualenv
207-
pre-commit==3.2.1
207+
pre-commit==3.2.2
208208
# via -r requirements/dev.in
209209
prometheus-client==0.16.0
210210
# via
@@ -215,7 +215,7 @@ prompt-toolkit==3.0.38
215215
# via
216216
# ipython
217217
# jupyter-console
218-
psutil==5.9.4
218+
psutil==5.9.5
219219
# via ipykernel
220220
ptyprocess==0.7.0
221221
# via
@@ -225,7 +225,7 @@ pure-eval==0.2.2
225225
# via stack-data
226226
pycparser==2.21
227227
# via cffi
228-
pygments==2.14.0
228+
pygments==2.15.1
229229
# via
230230
# ipython
231231
# jupyter-console
@@ -254,7 +254,7 @@ pyzmq==25.0.2
254254
# nbclassic
255255
# notebook
256256
# qtconsole
257-
qtconsole==5.4.1
257+
qtconsole==5.4.2
258258
# via jupyter
259259
qtpy==2.3.1
260260
# via qtconsole
@@ -279,7 +279,7 @@ six==1.16.0
279279
# rfc3339-validator
280280
sniffio==1.3.0
281281
# via anyio
282-
soupsieve==2.4
282+
soupsieve==2.4.1
283283
# via beautifulsoup4
284284
stack-data==0.6.2
285285
# via ipython
@@ -295,7 +295,7 @@ tomli==2.0.1
295295
# via
296296
# build
297297
# pyproject-hooks
298-
tornado==6.2
298+
tornado==6.3
299299
# via
300300
# ipykernel
301301
# jupyter-client
@@ -321,9 +321,11 @@ traitlets==5.9.0
321321
# nbformat
322322
# notebook
323323
# qtconsole
324+
typing-extensions==4.5.0
325+
# via ipython
324326
uri-template==1.2.0
325327
# via jsonschema
326-
virtualenv==20.21.0
328+
virtualenv==20.22.0
327329
# via pre-commit
328330
wcwidth==0.2.6
329331
# via prompt-toolkit

0 commit comments

Comments
 (0)