@@ -148,10 +148,11 @@ def assign_fastq(file, gene, index, assign_dict):
148148 for line in f :
149149 line = line .strip ()
150150 if i % 4 == 0 :
151- if re .search ('/1' ,line ) or re .search ('/2' ,line ):
152- read_name = line .split ()[0 ][1 :- 2 ]
151+ raw_read_name = line .split ()[0 ]
152+ if re .search ('/1' ,raw_read_name ) or re .search ('/2' ,raw_read_name ):
153+ read_name = raw_read_name [1 :- 2 ]
153154 else :
154- read_name = line . split ()[ 0 ] [1 :]
155+ read_name = raw_read_name [1 :]
155156 if read_name in assign_dict .keys () and assign_dict [read_name ] == gene :
156157 flag = True
157158 num = 1
@@ -187,15 +188,24 @@ def main():
187188 t1 = time .time ()
188189 print ("read bam cost %s" % (t1 - t0 ))
189190
191+ count_read_for_each_gene = {}
190192 # assign genes for each read
191193 for read_name in read_dict :
192194 assigned_locus = read_dict [read_name ].assign ()
193195 if assigned_locus == 'REMOVE' :
194196 continue
195197 assign_dict [read_name ] = assigned_locus
196-
198+ if assigned_locus not in count_read_for_each_gene :
199+ count_read_for_each_gene [assigned_locus ] = 0
200+ count_read_for_each_gene [assigned_locus ] += 1
201+
197202 # generate gene-specific fastq
198203 for gene in ['A' , 'B' , 'C' , 'DPA1' , 'DPB1' , 'DQA1' , 'DQB1' , 'DRB1' ]:
204+ if gene in count_read_for_each_gene :
205+ read_num = count_read_for_each_gene [gene ]
206+ else :
207+ read_num = 0
208+ print (f"read no. for { gene } is { read_num } ." )
199209 assign_fastq (options .fq1 , gene , 1 , assign_dict )
200210 assign_fastq (options .fq2 , gene , 2 , assign_dict )
201211 t2 = time .time ()
0 commit comments