Skip to content

Commit e500168

Browse files
authored
Merge pull request deanmalmgren#296 from jpweytjens/min_python_version
Sphinx only supports python 3.5+
2 parents cee7546 + ee8553b commit e500168

24 files changed

+141
-95
lines changed

Diff for: .travis.yml

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
sudo: required
2-
dist: trusty
2+
dist: bionic
33

44
language: python
55
python:
66
- "2.7"
7-
- "3.4"
7+
- "3.7"
88

99
# install system dependencies here with apt-get.
1010
before_install:
@@ -13,15 +13,24 @@ before_install:
1313
# install python dependencies including this package in the travis
1414
# virtualenv
1515
install:
16-
- ./provision/python.sh
17-
- pip install .
16+
17+
- if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]];
18+
then ./provision/python3.sh;
19+
fi
20+
- if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]];
21+
then ./provision/python2.sh;
22+
fi
23+
- pip install .[pocketsphinx]
1824

1925
# commands to run the testing suite. if any of these fail, travic lets us know
2026
script:
2127
- cd tests && make && cd -
2228
- nosetests --with-coverage --cover-package=textract
23-
- pep8 textract/ bin/textract
24-
- cd docs && make html && cd -
29+
- cd tests && pytest && cd -
30+
- pycodestyle textract/ bin/textract
31+
- if [[ $TRAVIS_PYTHON_VERSION == 3.7 ]];
32+
then cd docs && make html && cd -;
33+
fi
2534

2635
# commands to run after the tests successfully complete
2736
after_success:

Diff for: README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ Extract text from any document. No muss. No fuss.
2424
.. |Downloads| image:: https://img.shields.io/pypi/dm/textract.svg
2525
:target: https://warehouse.python.org/project/textract/
2626

27-
.. |Test Coverage| image:: https://coveralls.io/repos/deanmalmgren/textract/badge.png
28-
:target: https://coveralls.io/r/deanmalmgren/textract
27+
.. |Test Coverage| image:: https://coveralls.io/repos/github/deanmalmgren/textract/badge.svg?branch=master
28+
:target: https://coveralls.io/github/deanmalmgren/textract?branch=master
2929

3030
.. |Documentation Status| image:: https://readthedocs.org/projects/textract/badge/?version=latest
3131
:target: https://readthedocs.org/projects/textract/?badge=latest

Diff for: bin/textract

100755100644
+1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@ def main():
2929
else:
3030
args.output.write(output)
3131

32+
3233
main()

Diff for: docs/conf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
# built documents.
5959
#
6060
# The short X.Y version.
61-
release = version = "1.6.1"
61+
release = version = "1.6.3"
6262

6363
# The language for content autogenerated by Sphinx. Refer to documentation
6464
# for a list of supported languages.
@@ -132,7 +132,7 @@
132132
# Add any paths that contain custom static files (such as style sheets) here,
133133
# relative to this directory. They are copied after the builtin static files,
134134
# so a file named "default.css" will overwrite the builtin "default.css".
135-
html_static_path = ['.static']
135+
html_static_path = []
136136

137137
# Add any extra paths that contain custom files (such as robots.txt or
138138
# .htaccess) here, relative to this directory. These files are copied

Diff for: docs/index.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ file types by either mentioning them on the `issue tracker
7474

7575
* ``.pptx`` via `python-pptx`_
7676

77-
* ``.ps`` via `ps2text`_
77+
* ``.ps`` via `ps2ascii`_
7878

7979
* ``.rtf`` via `unrtf`_
8080

@@ -96,7 +96,7 @@ file types by either mentioning them on the `issue tracker
9696
.. _pdfminer.six: https://github.com/goulu/pdfminer
9797
.. _pdftotext: http://poppler.freedesktop.org/
9898
.. _pocketsphinx: https://github.com/cmusphinx/pocketsphinx/
99-
.. _ps2text: http://pages.cs.wisc.edu/~ghost/doc/pstotext.htm
99+
.. _ps2ascii: https://www.ghostscript.com/doc/current/Use.htm
100100
.. _python-docx2txt: https://github.com/ankushshah89/python-docx2txt
101101
.. _python-pptx: https://python-pptx.readthedocs.org/en/latest/
102102
.. _SpeechRecognition: https://pypi.python.org/pypi/SpeechRecognition/

Diff for: provision/python.sh renamed to provision/python2.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ fi
1212
pip install -U pip
1313

1414
# Install the requirements for this package as well as this module.
15-
pip install -r requirements/python-dev
15+
pip install -r requirements/python-dev2

Diff for: provision/python3.sh

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
# This needs to work for vagrant, Travis builds, and Docker builds.
4+
# in a python virtualenv. in the virtual machine provisioning,
5+
# we're passing the directory this should be run from. in travis-ci,
6+
# its run from the root of the repository.
7+
if [ "$#" -eq 1 ]; then
8+
cd $1
9+
fi
10+
11+
# upgrade pip so we can use wheel downloads
12+
pip install -U pip
13+
14+
# Install the requirements for this package as well as this module.
15+
pip install -r requirements/python-dev3
16+
pip install -r requirements/python-doc

Diff for: requirements/debian

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# required packages
22
gcc
33
libpulse-dev
4+
libasound2-dev
45
libjpeg-dev
56
build-essential
67
git
@@ -9,6 +10,7 @@ make
910
# these packages are required by python-docx, which depends on lxml
1011
# and requires these things
1112
python-dev
13+
python-pip
1214
libxml2-dev
1315
libxslt1-dev
1416

@@ -19,7 +21,7 @@ antiword
1921
unrtf
2022

2123
# parse image files
22-
tesseract-ocr=3.03\*
24+
tesseract-ocr
2325
libjpeg-dev
2426

2527
# parse pdfs

Diff for: requirements/python

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# package in order for it to properly work.
33

44
argcomplete==1.10.0
5-
beautifulsoup4==4.7.1
5+
beautifulsoup4==4.8.0
66
chardet==3.0.4
77
docx2txt==0.8
88
EbookLib==0.17.1

Diff for: requirements/python-dev renamed to requirements/python-dev2

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
# documentation builds (python-doc)
44

55
-r python
6-
-r python-doc
76

87
# needed for tests/run.py script to read .travis.yml file
9-
coveralls==1.8.1
8+
coveralls==1.8.2
109
nose==1.3.7
11-
pep8==1.7.1
10+
pycodestyle==2.5.0
1211
PyYAML==5.1.1
1312
requests==2.22.0
13+
pytest==4.6
1414

1515
# needed for managing versions
1616
bumpversion==0.5.3

Diff for: requirements/python-dev3

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# This includes all packages that are used in development, including all
2+
# packages that are required by textract itself (python), packages for
3+
# documentation builds (python-doc)
4+
5+
-r python
6+
7+
# needed for tests/run.py script to read .travis.yml file
8+
coveralls==1.8.2
9+
nose==1.3.7
10+
pycodestyle==2.5.0
11+
PyYAML==5.1.1
12+
pytest==5.0.1
13+
requests==2.22.0
14+
15+
# needed for managing versions
16+
bumpversion==0.5.3

Diff for: setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 1.6.2
2+
current_version = 1.6.3
33
commit = True
44
tag = True
55

Diff for: setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def parse_requirements(requirements_filename):
4242

4343
setup(
4444
name=textract.__name__,
45-
version="1.6.2",
45+
version="1.6.3",
4646
description="extract text from any document. no muss. no fuss.",
4747
long_description=long_description,
4848
url=github_url,

Diff for: tests/Makefile

+5-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ TARGETS = pdf/ocr_text.txt \
1111
png/raw_text.txt png/standardized_text.txt \
1212
gif/raw_text.txt gif/standardized_text.txt \
1313
jpg/raw_text.txt jpg/standardized_text.txt \
14-
tiff/raw_text.txt tiff/standardized_text.txt
14+
tiff/raw_text.txt tiff/standardized_text.txt \
15+
ps/raw_text.txt
1516

1617
all: $(TARGETS)
1718

@@ -27,6 +28,9 @@ pdf/ocr_text.txt: pdf/ocr_text.pdf
2728
cat pdf-ocr-text*.txt > $@
2829
rm -f pdf-ocr-text*
2930

31+
ps/raw_text.txt: ps/raw_text.ps
32+
ps2ascii $< > $@
33+
3034
# simple pattern rule for creating standard issue tesseract files for different
3135
# fileypes. the `g` shell variable is the path to the file without the
3236
# extension (e.g. g=png/raw_text)

0 commit comments

Comments
 (0)