Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ jobs:
with:
submodules: 'recursive'

- name: Set up Python 3.9
- name: Set up Python 3.13
uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: "3.13"

- name: Upgrade Pip
run: python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-development.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ else
cp ../VERSION search-api
cp ../BUILD search-api

docker compose -f docker-compose.yml -f docker-compose.development.yml -p search-api build
docker compose -f docker-compose.yml -f docker-compose.development.yml -p search-api build --no-cache
elif [ "$1" = "start" ]; then
docker compose -f docker-compose.yml -f docker-compose.development.yml -p search-api up -d
elif [ "$1" = "stop" ]; then
Expand Down
73 changes: 46 additions & 27 deletions docker/search-api/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Parent image
FROM hubmap/api-base-image:1.1.0
FROM hubmap/api-base-image:1.2.0

LABEL description="HuBMAP Search API Service"

Expand All @@ -13,45 +13,64 @@ WORKDIR /usr/src/app
# Copy from host to image
COPY . .

# http://nginx.org/en/linux_packages.html#RHEL-CentOS
# Set up the yum repository to install the latest mainline version of Nginx
RUN echo $'[nginx-mainline]\n\
name=nginx mainline repo\n\
baseurl=http://nginx.org/packages/mainline/centos/$releasever/$basearch/\n\
gpgcheck=1\n\
enabled=0\n\
gpgkey=https://nginx.org/keys/nginx_signing.key\n\
module_hotfixes=true\n'\
>> /etc/yum.repos.d/nginx.repo
# Set up the repository file for the stable version of
# nginx which dnf should use (in the legacy "yum" location.)
RUN set -eux && \
cat <<'EOF' > /etc/yum.repos.d/nginx.repo
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/$releasever/$basearch/
gpgcheck=1
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true
EOF

# Reduce the number of layers in image by minimizing the number of separate RUN commands
# 1 - Install the prerequisites
# 2 - By default, the repository for stable nginx packages is used. We would like to use mainline nginx packages
# 3 - Install nginx (using the custom yum repo specified earlier)
# 2 - By default, the repository for stable nginx packages is used.
# 3 - Install nginx (using the custom dnf/yum repo specified earlier)
# 4 - Remove the default nginx config file
# 5 - Overwrite the nginx.conf with ours to run nginx as non-root
# 6 - Remove the nginx directory copied from host machine (nginx/conf.d gets mounted to the container)
# 7 - Upgrade pip (the one installed in base image may be old) and install flask app dependencies (pip3 also works)
# 7 - Upgrade pip (the one installed in base image may be old) and install service requirements.txt packages
# 8 - Make the start script executable
# 9 - Clean all yum cache
RUN yum install -y yum-utils && \
yum-config-manager --enable nginx-mainline && \
yum install -y nginx && \
rm /etc/nginx/conf.d/default.conf && \
mv nginx/nginx.conf /etc/nginx/nginx.conf && \
rm -rf nginx && \
pip install -r src/requirements.txt && \
chmod +x start.sh && \
yum clean all
# 9 - Clean the dnf/yum cache and other locations to reduce Docker Image layer size.
# Assume the base image has upgraded dnf and installed its dnf-plugins-core

RUN dnf install --assumeyes nginx && \
# Push aside nginx default.conf files that may exist on the system
[ ! -f /etc/nginx/conf.d/default.conf ] || mv /etc/nginx/conf.d/default.conf /tmp/etc_nginx_conf.d_default.conf.ORIGINAL && \
[ ! -f /etc/nginx/nginx.conf ] || mv /etc/nginx/nginx.conf /tmp/etc_nginx_nginx.conf.ORIGINAL && \
# Install the nginx default.conf file just installed in WORKDIR
mv nginx/nginx.conf /etc/nginx/nginx.conf && \
# Clean up the nginx install directory in WORKDIR
[ ! -d nginx ] || mv nginx /tmp/nginx_from_WORKDIR && \
# Push aside the verification file from the base image which will
# no longer report correctly once uWSGI is started for the service.
[ ! -f /tmp/verify_uwsgi.sh ] || mv /tmp/verify_uwsgi.sh /tmp/verify_uwsgi.sh.ORIGINAL && \
# Install the requirements.txt file for the service
pip3.13 install --no-cache-dir --upgrade pip -r src/requirements.txt && \
dnf autoremove -y && \
# Make the script referenced in the CMD directive below executable.
chmod a+x start.sh && \
# Clean up artifacts to slim down this layer of the Docker Image
dnf clean all && \
rm -rf /var/cache/dnf \
/var/log/dnf \
/var/log/yum \
/root/.cache

# The EXPOSE instruction informs Docker that the container listens on the specified network ports at runtime.
# EXPOSE does not make the ports of the container accessible to the host.
# Here 5000 is for the uwsgi socket, 8080 for nginx
EXPOSE 5000 8080

# Set an entrypoint
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh
# Set an entrypoint by moving the file copied into the WORKDIR to
# the location referenced by the ENTRYPOINT directive below, and
# make it executable.
RUN mv entrypoint.sh /usr/local/bin/entrypoint.sh && \
chmod a+x /usr/local/bin/entrypoint.sh

ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]

Expand Down
2 changes: 1 addition & 1 deletion docker/search-api/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
nginx -g 'daemon off;' &

# Start uwsgi and keep it running in foreground
uwsgi --ini /usr/src/app/src/uwsgi.ini
/usr/local/python3.13/bin/uwsgi --ini /usr/src/app/src/uwsgi.ini
2 changes: 1 addition & 1 deletion scripts/fresh_indices/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The most recent operation will have the maximum key value, and hopefully will co

The results of commands which "do nothing" are written to the file as well i.e. when the 'catch-up' command is executed, but no entities are determined to need re-indexing.

Because of initial use of filenames, implicity assumption that one 'create' command is executed per day. Otherwise, need to clean up files & indices before trying another 'create' on the same day.
As designed for initial release, filenames implicitly assume one 'create' command is executed per day. Otherwise, files & indices need to be cleaned up before trying another 'create' on the same day.

Each op_data*.json file in exec_info should have the operation data for a 'create' command at entry '0'. Currently, all other entries will be for subsequent 'catch-up' operations.

Expand Down
18 changes: 9 additions & 9 deletions scripts/fresh_indices/es_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_document_agg_value(self, index_name, field_name, agg_name_enum: AggQuery
raise Exception(f"agg_name_enum='{agg_name_enum}' is not a supported aggregation.")

headers = {'Content-Type': 'application/json'}
agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum}": {{"field": "{field_name}"}}}}}}}}'
agg_field_query = f'{{ "aggs": {{"agg_query_result": {{"{agg_name_enum.value}": {{"field": "{field_name}"}}}}}}}}'
try:
rspn = requests.post(f"{self.elasticsearch_url}/{index_name}/_search?size=0"
,headers=headers
Expand Down Expand Up @@ -205,18 +205,18 @@ def empty_index(self, index_name):
# e.g. PUT your_index/_settings {"index": {"blocks.read_only": false}}
# https://opensearch.org/docs/latest/api-reference/cluster-api/cluster-settings/
# https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-blocks.html
def set_index_block(self, index_name, block_name):
if block_name not in IndexBlockType:
raise ValueError(f"'{block_name}' is not a block name supported by IndexBlockType")
def set_index_block(self, index_name: str, block_type_enum: IndexBlockType) -> None:
if block_type_enum not in IndexBlockType:
raise ValueError(f"'{block_type_enum}' is not a block name supported by IndexBlockType")
try:
if block_name is IndexBlockType.NONE:
if block_type_enum is IndexBlockType.NONE:
headers = {'Content-Type': 'application/json'}
payload_json = '{"index": {"blocks.write": false, "blocks.read_only": false, "blocks.read_only_allow_delete": false}}'
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_settings"
,headers=headers
,data=payload_json)
else:
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_name}")
rspn = requests.put(url=f"{self.elasticsearch_url}/{index_name}/_block/{block_type_enum.value}")
except Exception as e:
msg = "Exception encountered during executing ESManager.set_index_block()"
# Log the full stack trace, prepend a line with our message
Expand All @@ -233,12 +233,12 @@ def set_index_block(self, index_name, block_name):
# "blocked": true
# }]
# }
logger.info(f"Set '{block_name}' block on index: {index_name}")
logger.info(f"Set '{block_type_enum.value}' block on index: {index_name}")
return
else:
logger.error(f"Failed to set '{block_name}' block on index: {index_name}")
logger.error(f"Failed to set '{block_type_enum.value}' block on index: {index_name}")
logger.error(f"Error Message: {rspn.text}")
raise Exception(f"Failed to set '{block_name}' block on"
raise Exception(f"Failed to set '{block_type_enum.value}' block on"
f" index: {index_name}, with"
f" status_code {rspn.status_code}. See logs.")

Expand Down
25 changes: 13 additions & 12 deletions scripts/fresh_indices/fresh_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ def get_translator():

a_translator = Translator(INDICES, appcfg['APP_CLIENT_ID'], appcfg['APP_CLIENT_SECRET'], token,
appcfg['ONTOLOGY_API_BASE_URL'])
a_translator.log_configuration()

# Skip the uuids comparision step that is only needed for live /reindex-all PUT call
a_translator.skip_comparision = True
Expand Down Expand Up @@ -350,12 +351,12 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
flush_index=destination_index.replace('fill','flush')

# Block writing on the indices, even though services which write to them should probably be down.
logger.debug(f"Set {IndexBlockType.WRITE} block on source_index={source_index}.")
logger.debug(f"Set {IndexBlockType.WRITE.value} block on source_index={source_index}.")
es_mgr.set_index_block(index_name=source_index
, block_name=IndexBlockType.WRITE)
logger.debug(f"Set {IndexBlockType.WRITE} block on destination_index={destination_index}.")
, block_type_enum=IndexBlockType.WRITE)
logger.debug(f"Set {IndexBlockType.WRITE.value} block on destination_index={destination_index}.")
es_mgr.set_index_block(index_name=destination_index
, block_name=IndexBlockType.WRITE)
, block_type_enum=IndexBlockType.WRITE)
# Make sure the source_index health is "green" before proceeding.
es_mgr.wait_until_index_green(index_name=source_index
,wait_in_secs=30)
Expand All @@ -370,9 +371,9 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
es_mgr.wait_until_index_green(index_name=flush_index
,wait_in_secs=30)
logger.debug(f"Health of flush_index={flush_index} is green.")
logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.")
logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.")
es_mgr.set_index_block(index_name=source_index
, block_name=IndexBlockType.NONE)
, block_type_enum=IndexBlockType.NONE)
es_mgr.delete_index(index_name=source_index)
logger.debug(f"Deleted source_index={source_index}.")
op_data_supplement['golive']['swap_info'].append(f"Deleted {source_index}")
Expand All @@ -387,21 +388,21 @@ def swap_index_names_per_strategy(es_mgr:ESManager, fill_strategy:FillStrategyTy
es_mgr.wait_until_index_green(index_name=source_index
,wait_in_secs=30)
logger.debug(f"Health of source_index={source_index} is green.")
logger.debug(f"Set {IndexBlockType.NONE} block on destination_index={destination_index}.")
logger.debug(f"Set {IndexBlockType.NONE.value} block on destination_index={destination_index}.")
es_mgr.set_index_block(index_name=destination_index
, block_name=IndexBlockType.NONE)
, block_type_enum=IndexBlockType.NONE)
es_mgr.delete_index(index_name=destination_index)
logger.debug(f"Deleted destination_index={destination_index}.")
op_data_supplement['golive']['swap_info'].append(f"Deleted {destination_index}")

# Assure that the index which will be actively used by Search API and the
# backup of the previous version are writeable.
logger.debug(f"Set {IndexBlockType.NONE} block on source_index={source_index}.")
logger.debug(f"Set {IndexBlockType.NONE.value} block on source_index={source_index}.")
es_mgr.set_index_block(index_name=source_index
, block_name=IndexBlockType.NONE)
logger.debug(f"Set {IndexBlockType.NONE} block on flush_index={flush_index}.")
, block_type_enum=IndexBlockType.NONE)
logger.debug(f"Set {IndexBlockType.NONE.value} block on flush_index={flush_index}.")
es_mgr.set_index_block(index_name=flush_index
, block_name=IndexBlockType.NONE)
, block_type_enum=IndexBlockType.NONE)
else:
logger.error(f"Unable to 'rename' indices for fill_strategy={fill_strategy}")

Expand Down
113 changes: 93 additions & 20 deletions scripts/fresh_indices/fresh_indices.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,45 @@ Help()
################################################################################
# Verify the needs of this script are available, the version is acceptable, etc.
################################################################################
StartupVerifications()
{
# No version requirement for Python 3, but don't expect it to report
# a version if it is unavailable
if ! python3 --version | grep '^Python 3.[0-9]' > /dev/null; then
bail_out_errors+=("Python 3 does not seem to be available")
elif [[ "$arg_verbose" == true ]]; then
echo Python 3 found - `python3 --version`
StartupVerifications() {
# Check Python version >= 3.12 using Python itself, capturing
# output of heredoc to python_output variable.
python_output="$(
python3.13 - << 'EOF'
import sys

required = (3, 13)
current = sys.version_info

if current >= required:
# Print exact found version on success for the Bash wrapper to capture
print(f"OK {current.major}.{current.minor}.{current.micro}")
raise SystemExit(0)

print(
f"Python ≥ {required[0]}.{required[1]} is required, "
f"but found {current.major}.{current.minor}.{current.micro}"
)
raise SystemExit(2)
EOF
)"

status=$?

if [[ $status -eq 0 ]]; then
# If verbose, print the discovered version
if [[ "$arg_verbose" == true ]]; then
# Extract version after "OK "
python_version=${python_output#"OK "}
echo "Python 3 found – $python_version"
fi
else
# Append failure message to the global error array
bail_out_errors+=("$python_output")
fi

# Make sure an admin group token has been placed in a file so
# it can be passed in on the Python command line.
if [[ ! -f "./token_holder" ]]; then
bail_out_errors+=("The file 'token_holder' is not found in `pwd`")
fi
Expand All @@ -87,17 +116,6 @@ printf -v date_stamp '%(%Y-%m-%d)T' -1
# Commands accepted in the script arguments after the options, as described in Help()
recognized_commands=("create","catch-up","go-live")

# Pull the names of the destination indices from the same YAML which will be
# used for reindexing.
readarray -t entities_portal_indices < <(
python -c 'import yaml,sys; \
y=yaml.safe_load(sys.stdin); \
print(y["indices"]["entities"]["public"]); \
print(y["indices"]["entities"]["private"]); \
print(y["indices"]["portal"]["public"]); \
print(y["indices"]["portal"]["private"])' < ../../src/instance/search-config.yaml
)

################################################################################
# Set internal variables used by this script
################################################################################
Expand Down Expand Up @@ -146,9 +164,64 @@ else
esac
fi

LoadEntitiesPortableIndices() {

# Assign the argument passed in to the config_file variable
local config_file="$1"

# Reset the array on each call
entities_portal_indices=()

# Capture *stdout and stderr* from Python into python_output
python_output="$(
python3.13 - "$config_file" << 'EOF' 2>&1
import yaml, sys

try:
with open(sys.argv[1]) as f:
y = yaml.safe_load(f)
except Exception:
print(f"Unable to find configuration file: {sys.argv[1]}")
raise SystemExit(2)

try:
print(y["indices"]["entities"]["public"])
print(y["indices"]["entities"]["private"])
print(y["indices"]["portal"]["public"])
print(y["indices"]["portal"]["private"])
except KeyError as ke:
raise SystemExit(f"Missing key in {sys.argv[1]}: {ke}")
EOF
)"

local status=$?

if [[ $status -eq 0 ]]; then
# Success, split python_output into array lines
readarray -t entities_portal_indices <<< "$python_output"

if [[ "$arg_verbose" == true ]]; then
echo "Loaded indices from: $config_file"
for index in "${entities_portal_indices[@]%,}"; do
printf "\t%s\n" "$index"
done
fi

return 0
else
# Failure, add the *entire python_output* to the error array
bail_out_errors+=("$python_output")
return $status
fi
}

# Verify resources this script needs are available.
StartupVerifications

# Load the indices names from the YAML file for the project
config_file="../../src/instance/search-config.yaml"
LoadEntitiesPortableIndices "$config_file"

# Verify the specified output directory is writeable.
if [ ! -w $arg_output_dir ]; then
bail_out_errors+=("Unable to write files to '${arg_output_dir}'.")
Expand Down Expand Up @@ -182,6 +255,6 @@ else
echo "Unexpectedly tried to execute with cmd='$cmd'"
fi
MYPYPATH=../../src:../../src/search-adaptor/src:../../src/search-adaptor/src/libs:../../src/search-adaptor/src/translator
PYTHONPATH=$MYPYPATH python3 fresh_indices.py $cmd `cat ./token_holder`
PYTHONPATH=$MYPYPATH python3.13 fresh_indices.py $cmd `cat ./token_holder`

exit $EXIT_SUCCESS
Loading
Loading