Skip to content

Commit 4e530de

Browse files
committed
change keep markdup reads in bam file to don't markdup
1 parent c4ef7ca commit 4e530de

File tree

6 files changed

+31
-21
lines changed

6 files changed

+31
-21
lines changed

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@
22

33
Developed by BioTuring (www.bioturing.com), <i><b>mdup</b></i> is a tool that preprocess cloud-read data (read has barcode). <i><b>mdup</b></i> will do:
44

5-
- Markdup PCR duplication, remove not primary reads, duplicate reads, secondary alignment, unmapped reads.
5+
- Remove duplicate reads, remove not primary reads, secondary alignment, unmapped reads.
66
- Detect molecule by clustering reads have same barcode into group.
77
- Get stats about sequencing and GEM performance.
88

9+
Two reads are consider duplicate if they share same mapped position, mapped target, cigar,
10+
mate info (if paired-end).
11+
912
## Install
1013

1114
```shell
1215
git clone https://github.com/bioturing/mdup.git
1316
cd mdup
14-
sh build.sh
17+
bash build.sh
1518
```
1619

1720
## Usage
@@ -32,7 +35,7 @@ Optional arguments:
3235
-t INT number of threads [default: 1]
3336
-o DIR output directory [default: "./mdup_out/"]
3437
-g FILE reference file that generated bam file (for better stats)
35-
-k keep all record from BAM file, turn on duplicate bit flag instead.
38+
-k don't mark duplicate.
3639
```
3740

3841
# Contacts

argument.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ void print_usage()
1616
__VERBOSE(" -t INT number of threads [4]\n");
1717
__VERBOSE(" -o DIR output directory [\"./mdup_out/\"]\n");
1818
__VERBOSE(" -g FILE reference file that generated bam file (better statictis)\n");
19-
__VERBOSE(" -k keep all record from BAM file, turn on duplicate bit flag instead.\n");
19+
__VERBOSE(" -k don't mark duplicate\n");
2020
__VERBOSE("\n");
2121
__VERBOSE("This tool will generate some file in output directory:\n");
2222
__VERBOSE(" output.bam BAM file after processed\n");
@@ -33,7 +33,7 @@ void get_args(int argc, char *argv[])
3333
args.is_remove = true;
3434
args.reference = NULL;
3535

36-
if (argc < 3) {
36+
if (argc < 2) {
3737
print_usage();
3838
exit(0);
3939
}

argument.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
#include "attr.h"
55

6-
#define VERSION "1.2"
6+
#define VERSION "1.3"
77

88
struct prog_args {
99
int n_thread;

bam.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ void read_bam_unmapped(struct bam_inf_t *bam_inf, struct stats_t *stats)
3737
__ERROR("Unmapped read doesn't have not primary flag or suplementary flag!");
3838
}
3939
get_basic_stats(b, stats);
40-
if (!args.is_remove)
41-
sam_write1(out_bam_f, bam_inf->b_hdr, b);
4240
}
4341

4442
bam_destroy1(b);
@@ -71,8 +69,6 @@ void read_bam_target(struct bam_inf_t *bam_inf, int id, struct stats_t *stats)
7169
if (b->core.flag & (FLAG_NOT_PRI | FLAG_SUPPLEMENT)) {
7270
duplicate_try_process(b->core.pos, stats,
7371
out_bam_f, bam_inf->b_hdr);
74-
if (!args.is_remove)
75-
sam_write1(out_bam_f, bam_inf->b_hdr, b);
7672
continue;
7773
}
7874

@@ -82,8 +78,6 @@ void read_bam_target(struct bam_inf_t *bam_inf, int id, struct stats_t *stats)
8278
if (b->core.n_cigar == 0) {
8379
duplicate_try_process(b->core.pos, stats,
8480
out_bam_f, bam_inf->b_hdr);
85-
if (!args.is_remove)
86-
sam_write1(out_bam_f, bam_inf->b_hdr, b);
8781
continue;
8882
}
8983

build.sh

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
11
#!/bin/bash
22

3-
git submodule init && git submodule update
3+
RED="\033[0;31m\033[1m"
4+
NC="\033[0m" # No Color
45

5-
dir="$(pwd)/htslib/"
6+
if [ "$BASH_VERSION" = '' ]; then
7+
printf "${RED}Please run by command: bash build.sh${NC}\n"
8+
exit 1
9+
fi
610

7-
cd htslib && autoheader && autoconf
8-
9-
./configure --prefix=$dir --disable-lzma --disable-libcurl --disable-bz2
11+
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
12+
cd ${DIR}
13+
git submodule update --init --recursive || exit 1
1014

11-
make -j 4 && make install && cd ../
15+
# build htslib
16+
DIR="$(pwd)/htslib/"
17+
cd htslib && autoheader && autoconf
18+
./configure --prefix=${DIR} --disable-lzma --disable-libcurl --disable-bz2 || exit 1
19+
make || exit 1
20+
make install || exit 1
21+
cd ../
1222

13-
make
23+
# build mdup
24+
make || exit 1

duplicate.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,11 @@ void duplicate_process(struct stats_t *stats, samFile *out_bam_f, bam_hdr_t *b_h
7878
for (i = u; i < v; ++i) {
7979
if (i == pos)
8080
continue;
81-
align[i].b->core.flag |= FLAG_DUPLICATE;
82-
if (!args.is_remove)
81+
if (!args.is_remove) {
8382
sam_write1(out_bam_f, b_hdr, align[i].b);
83+
coverage_add(align[i].b, stats);
84+
mlc_insert(align[i].bx_id, align[i].b, stats);
85+
}
8486
}
8587

8688
sam_write1(out_bam_f, b_hdr, align[pos].b);

0 commit comments

Comments
 (0)