-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbam.c
More file actions
107 lines (92 loc) · 2.99 KB
/
bam.c
File metadata and controls
107 lines (92 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include "bam.h"
#include "argument.h"
#include "khash_bx.h"
#include "duplicate.h"
#include "molecule.h"
void bam_inf_init(struct bam_inf_t *bam_inf, const char *file_path)
{
bam_inf->bam_path = file_path;
bam_inf->cur_id = 0;
/* Open bam file and the index file */
samFile *bam_f;
if (!(bam_f = sam_open(file_path, "rb")))
__PERROR("Could not open BAM file");
if (!(bam_inf->bam_i = sam_index_load(bam_f, file_path)))
__ERROR("BAM file must be indexed!");
/* Init the header */
bam_inf->b_hdr = sam_hdr_read(bam_f);
sam_close(bam_f);
}
void read_bam_unmapped(struct bam_inf_t *bam_inf, struct stats_t *stats)
{
samFile *in_bam_f = sam_open(bam_inf->bam_path, "rb");
hts_itr_t *iter = sam_itr_queryi(bam_inf->bam_i, HTS_IDX_NOCOOR, 0, 0);
bam1_t *b = bam_init1();
char file_path[BUFSZ];
sprintf(file_path, "%s/unmapped.bam", args.out_dir);
samFile *out_bam_f = sam_open(file_path, "wb");
/* all bam record here is unmapped */
while (sam_itr_next(in_bam_f, iter, b) >= 0) {
if ((b->core.flag & (FLAG_NOT_PRI | FLAG_SUPPLEMENT)) != 0) {
__VERBOSE("\n");
__ERROR("Unmapped read doesn't have not primary flag or suplementary flag!");
}
get_basic_stats(b, stats);
}
bam_destroy1(b);
sam_itr_destroy(iter);
sam_close(in_bam_f);
sam_close(out_bam_f);
}
void read_bam_target(struct bam_inf_t *bam_inf, int id, struct stats_t *stats)
{
int target_len = bam_inf->b_hdr->target_len[id];
samFile *in_bam_f = sam_open(bam_inf->bam_path, "rb");
hts_itr_t *iter = sam_itr_queryi(bam_inf->bam_i, id, 1, target_len);
bam1_t *b = bam_init1();
int bx_map_cnt = 0;
khash_t(KHASH_STR) *khash_bx = kh_init(KHASH_STR);
char file_path[BUFSZ];
sprintf(file_path, "%s/temp.%s.bam", args.out_dir,
bam_inf->b_hdr->target_name[id]);
samFile *out_bam_f = sam_open(file_path, "wb");
coverage_init_target(id, bam_inf->b_hdr->target_len[id]);
int pos_fetch = 0;
while (sam_itr_next(in_bam_f, iter, b) >= 0) {
uint8_t *tag_data;
char *bar_s = NULL;
int bx_id = -1;
/* skip align is supplementary or not primary */
if (b->core.flag & (FLAG_NOT_PRI | FLAG_SUPPLEMENT)) {
duplicate_try_process(b->core.pos, stats,
out_bam_f, bam_inf->b_hdr);
continue;
}
get_basic_stats(b, stats);
/* only get basic stats for unmapped read */
if (b->core.n_cigar == 0) {
duplicate_try_process(b->core.pos, stats,
out_bam_f, bam_inf->b_hdr);
continue;
}
tag_data = bam_aux_get(b, "BX");
if (!tag_data)
__ERROR("Read doesn't have BX:Z: tag!");
bar_s = bam_aux2Z(tag_data);
bx_id = khash_bx_get_id(khash_bx, &bx_map_cnt, bar_s);
duplicate_insert(b, bx_id, stats, out_bam_f, bam_inf->b_hdr);
if (b->core.pos >= pos_fetch) {
while (pos_fetch < b->core.pos)
pos_fetch += MLC_CONS_THRES;
mlc_fetch(stats, b->core.pos);
}
}
duplicate_process(stats, out_bam_f, bam_inf->b_hdr);
coverage_get(stats, bam_inf->b_hdr->target_len[id]);
mlc_get_last(stats);
khash_bx_destroy(khash_bx);
sam_itr_destroy(iter);
bam_destroy1(b);
sam_close(in_bam_f);
sam_close(out_bam_f);
}