Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
33 changes: 33 additions & 0 deletions common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,36 @@ ensure_dir() {
mkdir -p "$1"
}
export -f ensure_dir

# Incrementally updates a hash listing file using XXH3.
# Only re-hashes files newer than the listing; handles additions and deletions.
# Usage: make_file_listing <listing> <find_dir> [extra_find_args...]
make_file_listing() {
local listing="$1" find_dir="$2"
shift 2
local extra_args=("$@")
local algo="-H3"
local algo_tag="# algo=xxh3"
local tmp_new tmp_cur
tmp_new=$(mktemp)
tmp_cur=$(mktemp)

if [[ ! -f "$listing" ]] || ! head -1 "$listing" | grep -qF "$algo_tag"; then
find "$find_dir" -type f "${extra_args[@]}" -exec xxhsum "$algo" {} + | sort -k2,2 >"${listing}.tmp"
{ echo "$algo_tag"; cat "${listing}.tmp"; } >"$listing"
rm -f "${listing}.tmp" "$tmp_new" "$tmp_cur"
return 0
fi

find "$find_dir" -type f "${extra_args[@]}" -newer "$listing" -exec xxhsum "$algo" {} + >"$tmp_new"
find "$find_dir" -type f "${extra_args[@]}" | sort >"$tmp_cur"

tail -n +2 "$listing" |
awk 'NR==FNR{skip[$2]=1; next} !($2 in skip)' "$tmp_new" - |
awk 'NR==FNR{exists[$1]=1; next} ($2 in exists)' "$tmp_cur" - |
{ cat; cat "$tmp_new"; } |
sort -k2,2 >"${listing}.tmp"
{ echo "$algo_tag"; cat "${listing}.tmp"; } >"$listing"
rm -f "${listing}.tmp" "$tmp_new" "$tmp_cur"
}
export -f make_file_listing
4 changes: 3 additions & 1 deletion genark2jbrowse/addNcbiGffAndTextIndex.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -euo pipefail

source "$(dirname "$0")/common.sh"

# Define function to add a GFF track to a JBrowse 2 assembly and create a text index.
Expand Down Expand Up @@ -28,7 +30,7 @@ add_track_and_text_index() {

jbrowse add-track --force "$gff_file_path" --out "$hub_dir" --load copy --indexFile "${gff_file_path}".csi --trackId "${accession}-ncbiGff" --name "NCBI RefSeq - RefSeq All (GFF)" --category "Genes and Gene Predictions" >/dev/null
# Check if trix folder exists
if [ -d "$hub_dir/trix" ] && [ -z "$REDOWNLOAD" ] && [ -z "$REPROCESS" ] && [ -z "$REPROCESS_TRIX" ]; then
if [ -d "$hub_dir/trix" ] && [ -z "${REDOWNLOAD:-}" ] && [ -z "${REPROCESS:-}" ] && [ -z "${REPROCESS_TRIX:-}" ]; then
add_trix_adapter "$accession" "$config_file"
else
echo "Trix folder does not exist for $accession, running jbrowse text-index"
Expand Down
10 changes: 6 additions & 4 deletions genark2jbrowse/cleanupStaleGff.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
# ./cleanupStaleGff.sh --exec # actually delete the files
#

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ALL_JSON="$SCRIPT_DIR/processedHubJson/all.json"
LOG_FILE="$SCRIPT_DIR/CLEANED.md"
DRY_RUN=true

if [ "${1}" = "--exec" ]; then
if [ "${1:-}" = "--exec" ]; then
DRY_RUN=false
fi

Expand Down Expand Up @@ -57,7 +59,7 @@ fi

# Remove leftover uncompressed .gff files in bgz/
echo "=== Leftover uncompressed .gff files in bgz/ ==="
! $DRY_RUN && log "## Leftover uncompressed .gff files in bgz/"
if ! $DRY_RUN; then log "## Leftover uncompressed .gff files in bgz/"; fi
for f in "$SCRIPT_DIR/bgz/"*.gff; do
[ -f "$f" ] || continue
delete_file "$f"
Expand All @@ -66,7 +68,7 @@ done
# Remove .csi files in bgz/ with no corresponding .gz
echo ""
echo "=== Orphaned .csi files in bgz/ ==="
! $DRY_RUN && log "" && log "## Orphaned .csi files in bgz/"
if ! $DRY_RUN; then log ""; log "## Orphaned .csi files in bgz/"; fi
for f in "$SCRIPT_DIR/bgz/"GC[FA]_*.gz.csi; do
[ -f "$f" ] || continue
if [ ! -f "${f%.csi}" ]; then
Expand All @@ -79,7 +81,7 @@ done
for dir in gff bgz; do
echo ""
echo "=== GFF files not in listing ($dir/) ==="
! $DRY_RUN && log "" && log "## GFF files not in listing ($dir/)"
if ! $DRY_RUN; then log ""; log "## GFF files not in listing ($dir/)"; fi

for f in "$SCRIPT_DIR/$dir/"GC[FA]_*.gz; do
[ -f "$f" ] || continue
Expand Down
7 changes: 7 additions & 0 deletions genark2jbrowse/createChainTrackPifs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ process_chain_file() {
process_liftover() {
local liftover_dir="$CONFIG_DIR/liftOver"
mkdir -p "$liftover_dir"
local stamp="$liftover_dir/.checked"

if [[ -f "$stamp" ]]; then
return 0
fi

local base_url="$HUB_URL/liftOver/"

Expand All @@ -189,6 +194,7 @@ process_liftover() {

if [[ -z "$urls" ]]; then
# No liftOver files found, which is normal for many assemblies
touch "$stamp"
return 0
fi

Expand All @@ -197,6 +203,7 @@ process_liftover() {
filename=$(basename "$url")
process_chain_file "$url" "$filename" "$liftover_dir"
done
touch "$stamp"
}

# --- Main Script ---
Expand Down
2 changes: 2 additions & 0 deletions genark2jbrowse/downloadNcbiGff.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -euo pipefail

source "$(dirname "$0")/common.sh"

echo "Phase 1: Building queue of GFF files to download..."
Expand Down
2 changes: 2 additions & 0 deletions genark2jbrowse/fetchNcbiMetadata.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/bash

set -euo pipefail

source "$(dirname "$0")/common.sh"

echo "Phase 1: Building queue of assemblies that need NCBI metadata..."
Expand Down
Loading
Loading