Skip to content

Commit c640296

Browse files
committed
fix the bug in read name recognition
1 parent 74f8e13 commit c640296

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

script/assign_reads_to_genes.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,11 @@ def assign_fastq(file, gene, index, assign_dict):
148148
for line in f:
149149
line = line.strip()
150150
if i % 4 == 0:
151-
if re.search('/1',line) or re.search('/2',line):
152-
read_name = line.split()[0][1:-2]
151+
raw_read_name = line.split()[0]
152+
if re.search('/1',raw_read_name) or re.search('/2',raw_read_name):
153+
read_name = raw_read_name[1:-2]
153154
else:
154-
read_name = line.split()[0][1:]
155+
read_name = raw_read_name[1:]
155156
if read_name in assign_dict.keys() and assign_dict[read_name] == gene:
156157
flag = True
157158
num = 1
@@ -187,15 +188,24 @@ def main():
187188
t1 = time.time()
188189
print ("read bam cost %s"%(t1 - t0))
189190

191+
count_read_for_each_gene = {}
190192
# assign genes for each read
191193
for read_name in read_dict:
192194
assigned_locus = read_dict[read_name].assign()
193195
if assigned_locus == 'REMOVE':
194196
continue
195197
assign_dict[read_name] = assigned_locus
196-
198+
if assigned_locus not in count_read_for_each_gene:
199+
count_read_for_each_gene[assigned_locus] = 0
200+
count_read_for_each_gene[assigned_locus] += 1
201+
197202
# generate gene-specific fastq
198203
for gene in ['A', 'B', 'C', 'DPA1', 'DPB1', 'DQA1', 'DQB1', 'DRB1']:
204+
if gene in count_read_for_each_gene:
205+
read_num = count_read_for_each_gene[gene]
206+
else:
207+
read_num = 0
208+
print (f"read no. for {gene} is {read_num}.")
199209
assign_fastq(options.fq1, gene, 1, assign_dict)
200210
assign_fastq(options.fq2, gene, 2, assign_dict)
201211
t2 = time.time()

0 commit comments

Comments
 (0)