Skip to content

Commit afa738b

Browse files
committed
Merge commit 'a3686deab294cb1baa8c544024a5d65dcad90846'
2 parents 528cddc + a3686de commit afa738b

File tree

79 files changed

+829
-334
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+829
-334
lines changed

lib/mmseqs/.cirrus.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ env:
44
task:
55
name: FreeBSD-13
66
freebsd_instance:
7-
image_family: freebsd-13-0
7+
image_family: freebsd-13-2-snap
88
install_script: pkg install -y cmake git samtools
99
compile_script: |
1010
mkdir build && cd build

lib/mmseqs/cmake/MMseqsSetupDerivedTarget.cmake

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
include(AppendTargetProperty)
22

33
function (mmseqs_setup_derived_target TARGET)
4-
get_target_property(COMPILE_TMP mmseqs-framework COMPILE_FLAGS)
5-
get_target_property(LINK_TMP mmseqs-framework LINK_FLAGS)
6-
get_target_property(DEF_TMP mmseqs-framework COMPILE_DEFINITIONS)
7-
get_target_property(INCL_TMP mmseqs-framework INCLUDE_DIRECTORIES)
4+
set(SOURCE "${ARGN}")
5+
if(NOT SOURCE)
6+
set(SOURCE "mmseqs-framework")
7+
endif()
8+
get_target_property(COMPILE_TMP ${SOURCE} COMPILE_FLAGS)
9+
get_target_property(LINK_TMP ${SOURCE} LINK_FLAGS)
10+
get_target_property(DEF_TMP ${SOURCE} COMPILE_DEFINITIONS)
11+
get_target_property(INCL_TMP ${SOURCE} INCLUDE_DIRECTORIES)
812

9-
target_link_libraries(${TARGET} mmseqs-framework)
13+
target_link_libraries(${TARGET} ${SOURCE})
1014
append_target_property(${TARGET} COMPILE_FLAGS ${COMPILE_TMP})
1115
append_target_property(${TARGET} LINK_FLAGS ${LINK_TMP})
1216
set_property(TARGET ${TARGET} APPEND PROPERTY COMPILE_DEFINITIONS ${DEF_TMP})

lib/mmseqs/data/workflow/blastp.sh

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -27,14 +54,23 @@ ALN_RES_MERGE="$TMP_PATH/aln_0"
2754
while [ "$STEP" -lt "$STEPS" ]; do
2855
SENS_PARAM=SENSE_${STEP}
2956
eval SENS="\$$SENS_PARAM"
30-
# call prefilter module
57+
58+
# 1. Prefilter hits
3159
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
32-
# shellcheck disable=SC2086
33-
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
34-
|| fail "Prefilter died"
60+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
61+
fake_pref "${INPUT}" "${TARGET}" "$TMP_PATH/pref_$STEP"
62+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
63+
# shellcheck disable=SC2086
64+
$RUNNER "$MMSEQS" ungappedprefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $UNGAPPEDPREFILTER_PAR \
65+
|| fail "Ungapped prefilter died"
66+
else
67+
# shellcheck disable=SC2086
68+
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
69+
|| fail "Prefilter died"
70+
fi
3571
fi
3672

37-
# call alignment module
73+
# 2. alignment module
3874
if [ "$STEPS" -eq 1 ]; then
3975
if notExists "$3.dbtype"; then
4076
# shellcheck disable=SC2086

lib/mmseqs/data/workflow/blastpgp.sh

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,33 @@ fail() {
55
exit 1
66
}
77

8+
abspath() {
9+
if [ -d "$1" ]; then
10+
(cd "$1"; pwd)
11+
elif [ -f "$1" ]; then
12+
if [ -z "${1##*/*}" ]; then
13+
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
14+
else
15+
echo "$(pwd)/$1"
16+
fi
17+
elif [ -d "$(dirname "$1")" ]; then
18+
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
19+
fi
20+
}
21+
22+
fake_pref() {
23+
QDB="$1"
24+
TDB="$2"
25+
RES="$3"
26+
# create link to data file which contains a list of all targets that should be aligned
27+
ln -s "$(abspath "${TDB}.index")" "${RES}"
28+
# create new index repeatedly pointing to same entry
29+
INDEX_SIZE="$(wc -c < "${TDB}.index")"
30+
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
31+
# create dbtype (7)
32+
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
33+
}
34+
835
notExists() {
936
[ ! -f "$1" ]
1037
}
@@ -28,15 +55,26 @@ STEP=0
2855
while [ "$STEP" -lt "$NUM_IT" ]; do
2956
# call prefilter module
3057
if notExists "$TMP_PATH/pref_tmp_${STEP}.done"; then
31-
PARAM="PREFILTER_PAR_$STEP"
32-
eval TMP="\$$PARAM"
58+
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
59+
TMP=""
60+
PREF="fake_pref"
61+
elif [ "$PREFMODE" = "UNGAPPED" ]; then
62+
PARAM="UNGAPPEDPREFILTER_PAR_$STEP"
63+
eval TMP="\$$PARAM"
64+
PREF="${MMSEQS} ungappedprefilter"
65+
else
66+
PARAM="PREFILTER_PAR_$STEP"
67+
eval TMP="\$$PARAM"
68+
PREF="${MMSEQS} prefilter"
69+
fi
70+
3371
if [ "$STEP" -eq 0 ]; then
3472
# shellcheck disable=SC2086
35-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
73+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
3674
|| fail "Prefilter died"
3775
else
3876
# shellcheck disable=SC2086
39-
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
77+
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
4078
|| fail "Prefilter died"
4179
fi
4280
touch "$TMP_PATH/pref_tmp_${STEP}.done"

lib/mmseqs/data/workflow/createtaxdb.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ if { [ "${DBMODE}" = "1" ] && notExists "${TAXDBNAME}_taxonomy"; } || { [ "${DBM
5959
# Download NCBI taxon information
6060
if notExists "${TMP_PATH}/ncbi_download.complete"; then
6161
echo "Download taxdump.tar.gz"
62-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz" "${TMP_PATH}/taxdump.tar.gz"
62+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz" "${TMP_PATH}/taxdump.tar.gz"
6363
tar -C "${TMP_PATH}" -xzf "${TMP_PATH}/taxdump.tar.gz" names.dmp nodes.dmp merged.dmp delnodes.dmp
6464
touch "${TMP_PATH}/ncbi_download.complete"
6565
rm -f "${TMP_PATH}/taxdump.tar.gz"

lib/mmseqs/data/workflow/databases.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,9 @@ case "${SELECTION}" in
118118
if notExists "${TMP_PATH}/nr.gz"; then
119119
date "+%s" > "${TMP_PATH}/version"
120120
downloadFile "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz" "${TMP_PATH}/nr.gz"
121-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz" "${TMP_PATH}/prot.accession2taxid.gz"
121+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/prot.accession2taxid.gz" "${TMP_PATH}/prot.accession2taxid.gz"
122122
gunzip "${TMP_PATH}/prot.accession2taxid.gz"
123-
downloadFile "https://ftp.ncbi.nih.gov/pub/taxonomy/accession2taxid/pdb.accession2taxid.gz" "${TMP_PATH}/pdb.accession2taxid.gz"
123+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/pdb.accession2taxid.gz" "${TMP_PATH}/pdb.accession2taxid.gz"
124124
gunzip "${TMP_PATH}/pdb.accession2taxid.gz"
125125
fi
126126
push_back "${TMP_PATH}/nr.gz"
@@ -147,7 +147,7 @@ case "${SELECTION}" in
147147
"PDB")
148148
if notExists "${TMP_PATH}/pdb_seqres.txt.gz"; then
149149
date "+%s" > "${TMP_PATH}/version"
150-
downloadFile "https://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz" "${TMP_PATH}/pdb_seqres.txt.gz"
150+
downloadFile "https://files.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt.gz" "${TMP_PATH}/pdb_seqres.txt.gz"
151151
fi
152152
push_back "${TMP_PATH}/pdb_seqres.txt.gz"
153153
INPUT_TYPE="FASTA_LIST"
@@ -212,8 +212,8 @@ case "${SELECTION}" in
212212
;;
213213
"CDD")
214214
if notExists "${TMP_PATH}/msa.msa.gz"; then
215-
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.info" "${TMP_PATH}/version"
216-
downloadFile "https://ftp.ncbi.nih.gov/pub/mmdb/cdd/fasta.tar.gz" "${TMP_PATH}/msa.tar.gz"
215+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/cdd.info" "${TMP_PATH}/version"
216+
downloadFile "https://ftp.ncbi.nlm.nih.gov/pub/mmdb/cdd/fasta.tar.gz" "${TMP_PATH}/msa.tar.gz"
217217
fi
218218
INPUT_TYPE="FASTA_MSA"
219219
SED_FIX_LOOKUP='s|\.FASTA||g'

lib/mmseqs/data/workflow/taxpercontig.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ if [ -n "${ORF_FILTER}" ]; then
4545
fi
4646

4747
if notExists "${TMP_PATH}/orfs_aln.list"; then
48-
awk '$3 > 1 { print $1 }' "${TMP_PATH}/orfs_aln.index" > "${TMP_PATH}/orfs_aln.list"
48+
# shellcheck disable=SC2086
49+
"$MMSEQS" recoverlongestorf "${ORFS_DB}" "${TMP_PATH}/orfs_aln" "${TMP_PATH}/orfs_aln_recovered.list" ${THREADS_PAR} \
50+
|| fail "recoverlongestorf died"
51+
awk '$3 > 1 { print $1 }' "${TMP_PATH}/orfs_aln.index" > "${TMP_PATH}/orfs_aln_remain.list"
52+
cat "${TMP_PATH}/orfs_aln_recovered.list" "${TMP_PATH}/orfs_aln_remain.list" > "${TMP_PATH}/orfs_aln.list"
4953
fi
5054

5155
if notExists "${TMP_PATH}/orfs_filter.dbtype"; then

lib/mmseqs/data/workflow/tsv2exprofiledb.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,19 @@ fi
2222

2323
if notExists "${OUT}.dbtype"; then
2424
"$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY}
25-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
25+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
2626
"$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY}
2727
fi
2828

2929
if notExists "${OUT}_seq.dbtype"; then
3030
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY}
31-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
31+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
3232
"$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY}
3333
fi
3434

3535
if notExists "${OUT}_aln.dbtype"; then
3636
"$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY}
37-
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
37+
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
3838
"$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY}
3939
fi
4040

lib/mmseqs/lib/alp/sls_pvalues.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ namespace Sls {
258258
return rand_C;
259259
};
260260

261-
static inline double standard_normal()//generates standard normal random value using the BoxMuller transform
261+
static inline double standard_normal()//generates standard normal random value using the Box-Muller transform
262262
{
263263
double r1=0;
264264
while(r1==0)

lib/mmseqs/lib/ksw2/kseq.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,10 @@ typedef struct __kstring_t {
113113
if (ks->end == -1) { ks->is_eof = 1; return -3; } \
114114
} else break; \
115115
} \
116-
if (delimiter == KS_SEP_LINE) { \
117-
for (i = ks->begin; i < ks->end; ++i) \
118-
if (ks->buf[i] == '\n') { ks->newline+=(append == 1); break; } \
116+
if (delimiter == KS_SEP_LINE) { \
117+
unsigned char *sep = (unsigned char*)memchr(ks->buf + ks->begin, '\n', ks->end - ks->begin); \
118+
i = sep != NULL ? sep - (unsigned char*)ks->buf : ks->end; \
119+
ks->newline += (sep != NULL && append == 1); \
119120
} else if (delimiter > KS_SEP_MAX) { \
120121
for (i = ks->begin; i < ks->end; ++i) \
121122
if (ks->buf[i] == delimiter) break; \

0 commit comments

Comments
 (0)