File tree 24 files changed +141
-95
lines changed
24 files changed +141
-95
lines changed Original file line number Diff line number Diff line change 1
1
sudo : required
2
- dist : trusty
2
+ dist : bionic
3
3
4
4
language : python
5
5
python :
6
6
- " 2.7"
7
- - " 3.4 "
7
+ - " 3.7 "
8
8
9
9
# install system dependencies here with apt-get.
10
10
before_install :
@@ -13,15 +13,24 @@ before_install:
13
13
# install python dependencies including this package in the travis
14
14
# virtualenv
15
15
install :
16
- - ./provision/python.sh
17
- - pip install .
16
+
17
+ - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]];
18
+ then ./provision/python3.sh;
19
+ fi
20
+ - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]];
21
+ then ./provision/python2.sh;
22
+ fi
23
+ - pip install .[pocketsphinx]
18
24
19
25
# commands to run the testing suite. if any of these fail, travic lets us know
20
26
script :
21
27
- cd tests && make && cd -
22
28
- nosetests --with-coverage --cover-package=textract
23
- - pep8 textract/ bin/textract
24
- - cd docs && make html && cd -
29
+ - cd tests && pytest && cd -
30
+ - pycodestyle textract/ bin/textract
31
+ - if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]];
32
+ then cd docs && make html && cd -;
33
+ fi
25
34
26
35
# commands to run after the tests successfully complete
27
36
after_success :
Original file line number Diff line number Diff line change @@ -24,8 +24,8 @@ Extract text from any document. No muss. No fuss.
24
24
.. |Downloads | image :: https://img.shields.io/pypi/dm/textract.svg
25
25
:target: https://warehouse.python.org/project/textract/
26
26
27
- .. |Test Coverage | image :: https://coveralls.io/repos/deanmalmgren/textract/badge.png
28
- :target: https://coveralls.io/r /deanmalmgren/textract
27
+ .. |Test Coverage | image :: https://coveralls.io/repos/github/ deanmalmgren/textract/badge.svg?branch=master
28
+ :target: https://coveralls.io/github /deanmalmgren/textract?branch=master
29
29
30
30
.. |Documentation Status | image :: https://readthedocs.org/projects/textract/badge/?version=latest
31
31
:target: https://readthedocs.org/projects/textract/?badge=latest
Original file line number Diff line number Diff line change @@ -29,4 +29,5 @@ def main():
29
29
else :
30
30
args .output .write (output )
31
31
32
+
32
33
main ()
Original file line number Diff line number Diff line change 58
58
# built documents.
59
59
#
60
60
# The short X.Y version.
61
- release = version = "1.6.1 "
61
+ release = version = "1.6.3 "
62
62
63
63
# The language for content autogenerated by Sphinx. Refer to documentation
64
64
# for a list of supported languages.
132
132
# Add any paths that contain custom static files (such as style sheets) here,
133
133
# relative to this directory. They are copied after the builtin static files,
134
134
# so a file named "default.css" will overwrite the builtin "default.css".
135
- html_static_path = ['.static' ]
135
+ html_static_path = []
136
136
137
137
# Add any extra paths that contain custom files (such as robots.txt or
138
138
# .htaccess) here, relative to this directory. These files are copied
Original file line number Diff line number Diff line change @@ -74,7 +74,7 @@ file types by either mentioning them on the `issue tracker
74
74
75
75
* ``.pptx `` via `python-pptx `_
76
76
77
- * ``.ps `` via `ps2text `_
77
+ * ``.ps `` via `ps2ascii `_
78
78
79
79
* ``.rtf `` via `unrtf `_
80
80
@@ -96,7 +96,7 @@ file types by either mentioning them on the `issue tracker
96
96
.. _pdfminer.six : https://github.com/goulu/pdfminer
97
97
.. _pdftotext : http://poppler.freedesktop.org/
98
98
.. _pocketsphinx : https://github.com/cmusphinx/pocketsphinx/
99
- .. _ ps2text : http ://pages.cs.wisc.edu/~ghost/ doc/pstotext .htm
99
+ .. _ ps2ascii : https ://www.ghostscript.com/ doc/current/Use .htm
100
100
.. _python-docx2txt : https://github.com/ankushshah89/python-docx2txt
101
101
.. _python-pptx : https://python-pptx.readthedocs.org/en/latest/
102
102
.. _SpeechRecognition : https://pypi.python.org/pypi/SpeechRecognition/
Original file line number Diff line number Diff line change 12
12
pip install -U pip
13
13
14
14
# Install the requirements for this package as well as this module.
15
- pip install -r requirements/python-dev
15
+ pip install -r requirements/python-dev2
Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+ # This needs to work for vagrant, Travis builds, and Docker builds.
4
+ # in a python virtualenv. in the virtual machine provisioning,
5
+ # we're passing the directory this should be run from. in travis-ci,
6
+ # its run from the root of the repository.
7
+ if [ " $# " -eq 1 ]; then
8
+ cd $1
9
+ fi
10
+
11
+ # upgrade pip so we can use wheel downloads
12
+ pip install -U pip
13
+
14
+ # Install the requirements for this package as well as this module.
15
+ pip install -r requirements/python-dev3
16
+ pip install -r requirements/python-doc
Original file line number Diff line number Diff line change 1
1
# required packages
2
2
gcc
3
3
libpulse-dev
4
+ libasound2-dev
4
5
libjpeg-dev
5
6
build-essential
6
7
git
9
10
# these packages are required by python-docx, which depends on lxml
10
11
# and requires these things
11
12
python-dev
13
+ python-pip
12
14
libxml2-dev
13
15
libxslt1-dev
14
16
@@ -19,7 +21,7 @@ antiword
19
21
unrtf
20
22
21
23
# parse image files
22
- tesseract-ocr=3.03\*
24
+ tesseract-ocr
23
25
libjpeg-dev
24
26
25
27
# parse pdfs
Original file line number Diff line number Diff line change 2
2
# package in order for it to properly work.
3
3
4
4
argcomplete==1.10.0
5
- beautifulsoup4==4.7.1
5
+ beautifulsoup4==4.8.0
6
6
chardet==3.0.4
7
7
docx2txt==0.8
8
8
EbookLib==0.17.1
Original file line number Diff line number Diff line change 3
3
# documentation builds (python-doc)
4
4
5
5
-r python
6
- -r python-doc
7
6
8
7
# needed for tests/run.py script to read .travis.yml file
9
- coveralls==1.8.1
8
+ coveralls==1.8.2
10
9
nose==1.3.7
11
- pep8==1.7.1
10
+ pycodestyle==2.5.0
12
11
PyYAML==5.1.1
13
12
requests==2.22.0
13
+ pytest==4.6
14
14
15
15
# needed for managing versions
16
16
bumpversion==0.5.3
Original file line number Diff line number Diff line change
1
+ # This includes all packages that are used in development, including all
2
+ # packages that are required by textract itself (python), packages for
3
+ # documentation builds (python-doc)
4
+
5
+ -r python
6
+
7
+ # needed for tests/run.py script to read .travis.yml file
8
+ coveralls==1.8.2
9
+ nose==1.3.7
10
+ pycodestyle==2.5.0
11
+ PyYAML==5.1.1
12
+ pytest==5.0.1
13
+ requests==2.22.0
14
+
15
+ # needed for managing versions
16
+ bumpversion==0.5.3
Original file line number Diff line number Diff line change 1
1
[bumpversion]
2
- current_version = 1.6.2
2
+ current_version = 1.6.3
3
3
commit = True
4
4
tag = True
5
5
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ def parse_requirements(requirements_filename):
42
42
43
43
setup (
44
44
name = textract .__name__ ,
45
- version = "1.6.2 " ,
45
+ version = "1.6.3 " ,
46
46
description = "extract text from any document. no muss. no fuss." ,
47
47
long_description = long_description ,
48
48
url = github_url ,
Original file line number Diff line number Diff line change @@ -11,7 +11,8 @@ TARGETS = pdf/ocr_text.txt \
11
11
png/raw_text.txt png/standardized_text.txt \
12
12
gif/raw_text.txt gif/standardized_text.txt \
13
13
jpg/raw_text.txt jpg/standardized_text.txt \
14
- tiff/raw_text.txt tiff/standardized_text.txt
14
+ tiff/raw_text.txt tiff/standardized_text.txt \
15
+ ps/raw_text.txt
15
16
16
17
all : $(TARGETS )
17
18
@@ -27,6 +28,9 @@ pdf/ocr_text.txt: pdf/ocr_text.pdf
27
28
cat pdf-ocr-text* .txt > $@
28
29
rm -f pdf-ocr-text*
29
30
31
+ ps/raw_text.txt : ps/raw_text.ps
32
+ ps2ascii $< > $@
33
+
30
34
# simple pattern rule for creating standard issue tesseract files for different
31
35
# fileypes. the `g` shell variable is the path to the file without the
32
36
# extension (e.g. g=png/raw_text)
You can’t perform that action at this time.
0 commit comments