Skip to content

Commit e43e917

Browse files
quedcragwolfe
andauthored
feat: amazon linux 2 setup script (#350)
Added Amazon Linux 2 setup script. Also updated Ubuntu setup script to keep the scripts as aligned as possible. Co-authored-by: cragwolfe <[email protected]>
1 parent 6be07a5 commit e43e917

File tree

4 files changed

+150
-36
lines changed

4 files changed

+150
-36
lines changed

Diff for: CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
## 0.5.3-dev5
1+
## 0.5.3
22

33
### Enhancements
44

@@ -10,6 +10,7 @@
1010

1111
* Add `--wikipedia-auto-suggest` argument to the ingest CLI to disable automatic redirection
1212
to pages with similar names.
13+
* Add setup script for Amazon Linux 2
1314
* Add optional `encoding` argument to the `partition_(text/email/html)` functions.
1415
* Added Google Drive connector for ingest cli.
1516
* Added Gitlab connector for ingest cli.

Diff for: scripts/setup_al2.sh

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/bin/bash
2+
set +u
3+
4+
if [ -z "$1" ]; then
5+
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
6+
echo "Ex: ${0} abertl"
7+
exit 1
8+
fi
9+
10+
set -eux
11+
12+
# Set package manager command for this distribution
13+
pac="yum"
14+
15+
# If we're not running as root, we want to prefix certain commands with sudo
16+
if [[ $(whoami) == 'root' ]]; then
17+
$pac update -y
18+
$pac install -y sudo
19+
sudo=''; else
20+
type -p sudo >/dev/null || (echo "Please have an administrator install sudo and add you to the sudo group before continuing." && exit 1)
21+
sudo='sudo'
22+
fi
23+
24+
# Set user account for which we're configuring the tools
25+
USER_ACCOUNT=$1
26+
27+
# Update existing packages
28+
$sudo $pac update -y
29+
30+
#### Utils
31+
# Prerequisites
32+
$sudo $pac install -y gcc wget tar curl make xz-devel
33+
# Install non-ancient version of sed
34+
wget http://ftp.gnu.org/gnu/sed/sed-4.9.tar.gz
35+
tar xvf sed-4.9.tar.gz
36+
cd sed-4.9/
37+
./configure && make && $sudo make install
38+
cd ..
39+
40+
#### Git
41+
# Install git
42+
$sudo $pac install -y git
43+
44+
#### Python
45+
# Install tools needed to build python
46+
$sudo $pac install -y bzip2 sqlite zlib-devel readline-devel sqlite-devel openssl-devel tk-devel libffi-devel bzip2-devel
47+
# Install pyenv
48+
sudo -u "$USER_ACCOUNT" -i <<'EOF'
49+
if [[ ! -d "$HOME"/.pyenv ]]; then
50+
cd $HOME
51+
curl https://pyenv.run | bash
52+
touch "$HOME"/.bashrc
53+
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
54+
# shellcheck disable=SC2016
55+
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$HOME"/.bashrc
56+
# shellcheck disable=SC2016
57+
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$HOME"/.bashrc
58+
# shellcheck disable=SC2016
59+
sed -i '/eval "$(pyenv init -)"/d' "$HOME"/.bashrc
60+
# shellcheck disable=SC2016
61+
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$HOME"/.bashrc
62+
# Add initialization lines to .bashrc
63+
# shellcheck disable=SC2016
64+
cat <<'EOT' | cat - "$HOME"/.bashrc > temp && mv temp "$HOME"/.bashrc
65+
export PYENV_ROOT="$HOME/.pyenv"
66+
command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
67+
eval "$(pyenv init -)"
68+
eval "$(pyenv virtualenv-init -)"
69+
EOT
70+
# install python
71+
source "$HOME"/.bashrc
72+
pyenv install 3.8.15
73+
fi
74+
EOF
75+
76+
#### OpenCV dependencies
77+
$sudo $pac install -y mesa-libGL
78+
79+
#### Poppler
80+
# Install poppler
81+
$sudo $pac install -y poppler-utils
82+
83+
#### Tesseract
84+
# Install dependencies for image and pdf manipulation
85+
$sudo $pac install -y opencv opencv-devel opencv-python perl-core clang libpng-devel libtiff-devel libwebp-devel libjpeg-turbo-devel git-core libtool pkgconfig xz
86+
# Install leptonica (tesseract dependency)
87+
wget https://github.com/DanBloomberg/leptonica/releases/download/1.75.1/leptonica-1.75.1.tar.gz
88+
tar -xzvf leptonica-1.75.1.tar.gz
89+
cd leptonica-1.75.1
90+
./configure && make && $sudo make install
91+
cd ..
92+
# Install autoconf-archive (tesseract dependency)
93+
wget http://mirror.squ.edu.om/gnu/autoconf-archive/autoconf-archive-2017.09.28.tar.xz
94+
tar -xvf autoconf-archive-2017.09.28.tar.xz
95+
cd autoconf-archive-2017.09.28
96+
./configure && make && $sudo make install
97+
$sudo cp m4/* /usr/share/aclocal
98+
cd ..
99+
# Install tesseract
100+
git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git tesseract-ocr
101+
cd tesseract-ocr
102+
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
103+
./autogen.sh
104+
./configure && make && $sudo make install
105+
cd ..
106+
# Install tesseract languages
107+
git clone https://github.com/tesseract-ocr/tessdata.git
108+
$sudo cp tessdata/*.traineddata /usr/local/share/tessdata
109+
110+
#### libmagic
111+
$sudo $pac install -y file-devel

Diff for: scripts/setup_ubuntu.sh

+36-34
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
#!/bin/bash
2-
set +u -e
2+
set +u
33

44
if [ -z "$1" ]; then
55
echo "When running this script, please supply the name of the user account for which to set up unstructured dependencies."
66
echo "Ex: ${0} abertl"
77
exit 1
88
fi
99

10-
set -ux
11-
12-
# Set user account for which we're configuring the tools
13-
USER_ACCOUNT=$1
14-
USER_ACCOUNT_HOME=$(bash -c "cd ~$(printf %q "$USER_ACCOUNT") && pwd")
10+
set -eux
1511

1612
# Set package manager command for this distribution
1713
pac="apt"
@@ -25,6 +21,9 @@ if [[ $(whoami) == 'root' ]]; then
2521
sudo='sudo'
2622
fi
2723

24+
# Set user account for which we're configuring the tools
25+
USER_ACCOUNT=$1
26+
2827
# Update existing packages
2928
# Reconfigure the service that detects the need for service restarts from interactive mode (user
3029
# needs to manually confirm which services to restart) to automatic. If we don't do this we'll
@@ -36,42 +35,45 @@ if [[ -d /etc/needrestart/conf.d ]]; then
3635
fi
3736
$sudo $pac upgrade -y
3837

38+
#### Utils
39+
# Prerequisites
40+
$sudo env DEBIAN_FRONTEND="noninteractive" $pac install -y gcc wget tar curl make xz-utils build-essential tzdata
41+
3942
#### Git
4043
# Install git
4144
$sudo $pac install -y git
4245

4346
#### Python
4447
# Install tools needed to build python
45-
$sudo env DEBIAN_FRONTEND="noninteractive" $pac install -y curl gcc bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev xz-utils make build-essential libbz2-dev wget llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
48+
$sudo $pac install -y bzip2 sqlite zlib1g-dev libreadline-dev libsqlite3-dev libssl-dev tk-dev libffi-dev libbz2-dev llvm libncursesw5-dev libxml2-dev libxmlsec1-dev liblzma-dev
4649
# Install pyenv
47-
if [[ ! -d $USER_ACCOUNT_HOME/.pyenv ]]; then
48-
sudo -u "$USER_ACCOUNT" -i <<'EOF'
49-
cd $HOME
50-
curl https://pyenv.run | bash
50+
sudo -u "$USER_ACCOUNT" -i <<'EOF'
51+
if [[ ! -d "$HOME"/.pyenv ]]; then
52+
cd $HOME
53+
curl https://pyenv.run | bash
54+
touch "$HOME"/.bashrc
55+
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
56+
# shellcheck disable=SC2016
57+
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$HOME"/.bashrc
58+
# shellcheck disable=SC2016
59+
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$HOME"/.bashrc
60+
# shellcheck disable=SC2016
61+
sed -i '/eval "$(pyenv init -)"/d' "$HOME"/.bashrc
62+
# shellcheck disable=SC2016
63+
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$HOME"/.bashrc
64+
# Add initialization lines to .bashrc
65+
# shellcheck disable=SC2016
66+
cat <<'EOT' | cat - "$HOME"/.bashrc > temp && mv temp "$HOME"/.bashrc
67+
export PYENV_ROOT="$HOME/.pyenv"
68+
command -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"
69+
eval "$(pyenv init -)"
70+
eval "$(pyenv virtualenv-init -)"
71+
EOT
72+
# install python
73+
source "$HOME"/.bashrc
74+
pyenv install 3.8.15
75+
fi
5176
EOF
52-
# Remove initialization lines from .bashrc if they are already there, so we don't duplicate them
53-
# shellcheck disable=SC2016
54-
sed -i '/export PYENV_ROOT="$HOME\/.pyenv"/d' "$USER_ACCOUNT_HOME"/.bashrc
55-
# shellcheck disable=SC2016
56-
sed -i '/command -v pyenv >\/dev\/null || export PATH="$PYENV_ROOT\/bin:$PATH"/d' "$USER_ACCOUNT_HOME"/.bashrc
57-
# shellcheck disable=SC2016
58-
sed -i '/eval "$(pyenv init -)"/d' "$USER_ACCOUNT_HOME"/.bashrc
59-
# shellcheck disable=SC2016
60-
sed -i '/eval "$(pyenv virtualenv-init -)"/d' "$USER_ACCOUNT_HOME"/.bashrc
61-
# Add initialization lines to .bashrc
62-
# shellcheck disable=SC2016
63-
sed -i '1ieval "$(pyenv virtualenv-init -)"' "$USER_ACCOUNT_HOME"/.bashrc
64-
# shellcheck disable=SC2016
65-
sed -i '1ieval "$(pyenv init -)"' "$USER_ACCOUNT_HOME"/.bashrc
66-
# shellcheck disable=SC2016
67-
sed -i '1icommand -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"' "$USER_ACCOUNT_HOME"/.bashrc
68-
# shellcheck disable=SC2016
69-
sed -i '1iexport PYENV_ROOT="$HOME/.pyenv"' "$USER_ACCOUNT_HOME"/.bashrc
70-
# install python
71-
sudo -u "$USER_ACCOUNT" -i <<'EOF'
72-
pyenv install 3.8.15
73-
EOF
74-
fi
7577

7678
#### OpenCV dependencies
7779
$sudo $pac install -y libgl1

Diff for: unstructured/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.3-dev5" # pragma: no cover
1+
__version__ = "0.5.3" # pragma: no cover

0 commit comments

Comments
 (0)