Skip to content

Commit 5cc013f

Browse files
Jonathan Manningiontorrent-dev
authored andcommitted
Add downsample to examples.
Signed-off-by: Nils Homer <[email protected]>
1 parent b3fa9e7 commit 5cc013f

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

examples/downsample.c

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#include <stdio.h>
2+
#include "sam.h"
3+
4+
struct fetchdata {
5+
float sample;
6+
long int total, filtered;
7+
};
8+
9+
// callback for bam_fetch()
10+
static int fetch_func(const bam1_t *b, void *data)
11+
{
12+
struct fetchdata *d = (struct fetchdata *) data;
13+
d->total++;
14+
15+
// Validate b against criteria
16+
uint32_t flag = b->core.flag;
17+
18+
// If read is not mapped or flaged as duplicate skip
19+
if(flag & BAM_FUNMAP || flag & BAM_FDUP) {
20+
return 0;
21+
}
22+
23+
// Use only primary alignments
24+
if(flag & BAM_FSECONDARY) {
25+
return 0; // Don't use secondary alignments
26+
}
27+
28+
d->filtered++;
29+
30+
if( ( (float)rand() / (float) RAND_MAX ) > d->sample ) return 0;
31+
32+
return 1;
33+
}
34+
35+
int main(int argc, char *argv[])
36+
{
37+
/* FIXME - stdin > stdout */
38+
if (argc != 3) {
39+
fprintf(stderr, "Usage: downsample <in.bam> <out.bam>\n");
40+
return 1;
41+
}
42+
43+
float sample = 0.50f;
44+
45+
samfile_t *in = samopen(argv[1], "rb", 0);
46+
if(in == 0) {
47+
fprintf(stderr, "Failed to open input BAM file %s\n", argv[1]);
48+
return 1;
49+
}
50+
51+
samfile_t *out;
52+
/* Provide bam_header_t from source file as header for output */
53+
out = samopen(argv[2], "wbh", in->header);
54+
if(out == 0) {
55+
fprintf(stderr, "Failed to open output file %s\n", argv[1]);
56+
return 1;
57+
}
58+
59+
/* NB: rand is seeded with constant - result should be reproducible */
60+
srand(42);
61+
62+
struct fetchdata data = { sample, 0, 0 };
63+
64+
bam1_t *b = bam_init1();
65+
unsigned long count = 0;
66+
while(samread(in, b) > 0) {
67+
if(fetch_func(b, &data) != 0) continue;
68+
69+
// Write to out
70+
samwrite(out, b);
71+
count++;
72+
}
73+
bam_destroy1(b);
74+
75+
fprintf(stderr, "Sampled %ld reads out of %ld total, %ld filtered [mapped, primary, non-duplicate]\n",count, data.total, data.filtered);
76+
77+
samclose(in);
78+
samclose(out);
79+
return 0;
80+
}

0 commit comments

Comments
 (0)