Skip to content

Commit 69d1454

Browse files
committed
Made some improvements to the uploading of proteins to OpenSearch
1 parent 144c4af commit 69d1454

1 file changed

Lines changed: 19 additions & 1 deletion

File tree

unipept-index/install.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,26 @@ setup_database() {
138138
# Wait until OpenSearch has fully started
139139
timeout 90s bash -c 'until curl -s http://localhost:9200; do echo "Waiting for OpenSearch..."; sleep 5; done'
140140

141+
# Verify the UniProt entries file exists and is non-empty before starting the upload
142+
if [[ ! -f "$FEATURE_DIR/uniprot_entries.tsv.lz4" ]]; then
143+
error_exit "UniProt entries file not found: $FEATURE_DIR/uniprot_entries.tsv.lz4"
144+
fi
145+
if [[ ! -s "$FEATURE_DIR/uniprot_entries.tsv.lz4" ]]; then
146+
error_exit "UniProt entries file is empty: $FEATURE_DIR/uniprot_entries.tsv.lz4"
147+
fi
148+
echo "UniProt entries file size: $(du -sh "$FEATURE_DIR/uniprot_entries.tsv.lz4" | cut -f1)"
149+
141150
# Load data by executing the script from the unipept-database repo
142151
"${REPO_TMP_DIR}/unipept-database/scripts/initialize_opensearch.sh" --uniprot-entries "$FEATURE_DIR/uniprot_entries.tsv.lz4"
143152

153+
# Flush buffered writes and report the number of indexed proteins
154+
curl -s -X POST "http://localhost:9200/uniprot_entries/_refresh" > /dev/null
155+
protein_count=$(curl -s "http://localhost:9200/uniprot_entries/_count" | jq '.count')
156+
echo "Indexed protein count in OpenSearch: $protein_count"
157+
if [[ "$protein_count" -eq 0 ]]; then
158+
error_exit "No proteins were indexed. The upload likely failed silently."
159+
fi
160+
144161
# Gracefully stop OpenSearch now that data loading is complete
145162
echo "Stopping OpenSearch after data load..."
146163
kill "$OPENSEARCH_PID" || true
@@ -153,7 +170,8 @@ setup_database() {
153170
set -euo pipefail
154171
CONTAINER_USER="${_REMOTE_USER:-vscode}"
155172
echo "Starting OpenSearch..."
156-
sudo -u "$CONTAINER_USER" /usr/share/opensearch/bin/opensearch > /var/log/opensearch/startup.log 2>&1 &
173+
sudo -u "$CONTAINER_USER" nohup /usr/share/opensearch/bin/opensearch > /var/log/opensearch/startup.log 2>&1 &
174+
disown
157175
timeout 90s bash -c 'until curl -s http://localhost:9200; do echo "Waiting for OpenSearch..."; sleep 5; done'
158176
echo "OpenSearch is ready."
159177
EOF

0 commit comments

Comments
 (0)