@@ -29,35 +29,57 @@ static int read_bam(void *data, bam1_t *b) // read level filters better go here
2929 return ret ;
3030}
3131
32+ typedef struct {
33+ int32_t bin ;
34+ int32_t bin_idx ;
35+ int32_t tid ;
36+ int32_t bin_size ;
37+ } circos_t ;
38+
39+ static void circos_print (circos_t * circos , bam_header_t * h )
40+ {
41+ if (circos -> tid < 0 || 0 == circos -> bin ) return ;
42+ // NB: this could be faster with custom routines
43+ fputs (h -> target_name [circos -> tid ], stdout );
44+ printf ("\t%d\t%d\t%f\n" ,
45+ (circos -> bin_idx * circos -> bin_size ) + 1 ,
46+ (circos -> bin_idx + 1 ) * circos -> bin_size ,
47+ circos -> bin / (double )circos -> bin_size );
48+ }
49+
3250#ifdef _MAIN_BAM2DEPTH
3351int main (int argc , char * argv [])
3452#else
3553int main_depth (int argc , char * argv [])
3654#endif
3755{
38- int i , n , tid , beg , end , pos , * n_plp , baseQ = 0 , mapQ = 0 ;
56+ int i , n , tid , beg , end , pos , * n_plp , baseQ = 0 , mapQ = 0 , use_circos = 0 ;
3957 const bam_pileup1_t * * plp ;
4058 char * reg = 0 ; // specified region
4159 void * bed = 0 ; // BED data structure
4260 bam_header_t * h = 0 ; // BAM header of the 1st input
4361 aux_t * * data ;
4462 bam_mplp_t mplp ;
63+ circos_t circos ; circos .bin_size = 10000 ;
4564
4665 // parse the command line
47- while ((n = getopt (argc , argv , "r:b:q:Q:" )) >= 0 ) {
66+ while ((n = getopt (argc , argv , "r:b:q:Q:cB: " )) >= 0 ) {
4867 switch (n ) {
4968 case 'r' : reg = strdup (optarg ); break ; // parsing a region requires a BAM header
5069 case 'b' : bed = bed_read (optarg ); break ; // BED or position list file can be parsed now
5170 case 'q' : baseQ = atoi (optarg ); break ; // base quality threshold
5271 case 'Q' : mapQ = atoi (optarg ); break ; // mapping quality threshold
72+ case 'c' : use_circos = 1 ; break ; // circos output
73+ case 'B' : circos .bin_size = atoi (optarg ); break ; // circos bin size
5374 }
5475 }
5576 if (optind == argc ) {
56- fprintf (stderr , "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] <in1.bam> [...]\n" );
77+ fprintf (stderr , "Usage: depth [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] [-c [-B binSize] ] <in1.bam> [...]\n" );
5778 return 1 ;
5879 }
5980
6081 // initialize the auxiliary data structures
82+ if (use_circos ) circos .bin = circos .bin_idx = 0 ; circos .tid = -1 ;
6183 n = argc - optind ; // the number of BAMs on the command line
6284 data = calloc (n , sizeof (void * )); // data[i] for the i-th input
6385 beg = 0 ; end = 1 <<30 ; tid = -1 ; // set the default region
@@ -83,22 +105,39 @@ int main_depth(int argc, char *argv[])
83105 n_plp = calloc (n , sizeof (int )); // n_plp[i] is the number of covering reads from the i-th BAM
84106 plp = calloc (n , sizeof (void * )); // plp[i] points to the array of covering reads (internal in mplp)
85107 while (bam_mplp_auto (mplp , & tid , & pos , n_plp , plp ) > 0 ) { // come to the next covered position
108+ int32_t cov = 0 ;
86109 if (pos < beg || pos >= end ) continue ; // out of range; skip
87110 if (bed && bed_overlap (bed , h -> target_name [tid ], pos , pos + 1 ) == 0 ) continue ; // not in BED; skip
88- fputs (h -> target_name [tid ], stdout ); printf ("\t%d" , pos + 1 ); // a customized printf() would be faster
89- for (i = 0 ; i < n ; ++ i ) { // base level filters have to go here
90- int j , m = 0 ;
91- for (j = 0 ; j < n_plp [i ]; ++ j ) {
92- const bam_pileup1_t * p = plp [i ] + j ; // DON'T modfity plp[][] unless you really know
93- if (p -> is_del || p -> is_refskip ) ++ m ; // having dels or refskips at tid:pos
94- else if (bam1_qual (p -> b )[p -> qpos ] < baseQ ) ++ m ; // low base quality
95- }
96- printf ("\t%d" , n_plp [i ] - m ); // this the depth to output
97- }
98- putchar ('\n' );
111+ if (0 == use_circos ) { fputs (h -> target_name [tid ], stdout ); printf ("\t%d" , pos + 1 ); } // a customized printf() would be faster
112+ for (i = 0 ; i < n ; ++ i ) { // base level filters have to go here
113+ int j , m = 0 ;
114+ for (j = 0 ; j < n_plp [i ]; ++ j ) {
115+ const bam_pileup1_t * p = plp [i ] + j ; // DON'T modfity plp[][] unless you really know
116+ if (p -> is_del || p -> is_refskip ) ++ m ; // having dels or refskips at tid:pos
117+ else if (bam1_qual (p -> b )[p -> qpos ] < baseQ ) ++ m ; // low base quality
118+ }
119+ if (0 == use_circos ) printf ("\t%d" , n_plp [i ] - m ); // this the depth to output
120+ else cov += (n_plp [i ] - m );
121+ }
122+ if (0 == use_circos ) putchar ('\n' );
123+ else {
124+ pos ++ ; // make one-based
125+ int32_t bin_idx = ((pos - (pos % circos .bin_size )) / circos .bin_size );
126+ if (tid == circos .tid && bin_idx == circos .bin_idx ) {
127+ circos .bin += cov ; // this is the depth to output
128+ }
129+ else {
130+ circos_print (& circos , h ); // print
131+ // update
132+ circos .bin = cov ; // this is the depth to output
133+ circos .bin_idx = bin_idx ;
134+ circos .tid = tid ;
135+ }
136+ }
99137 }
100138 free (n_plp ); free (plp );
101139 bam_mplp_destroy (mplp );
140+ if (1 == use_circos ) circos_print (& circos , h ); // print
102141
103142 bam_header_destroy (h );
104143 for (i = 0 ; i < n ; ++ i ) {
0 commit comments