1+ from sequence import *
2+ from sym import *
3+ from sstruct import *
4+
5+ atp = 0 # number of true positives (correctly identified calls)
6+ atn = 0 # number of true negatives (correctly missed no-calls)
7+ afp = 0 # number of false positives (incorrectly identified no-calls)
8+ afn = 0 # number of false negatives (incorrectly missed calls)
9+
10+ btp = 0
11+ btn = 0
12+ bfp = 0
13+ bfn = 0
14+
15+ ctp = 0
16+ ctn = 0
17+ cfp = 0
18+ cfn = 0
19+ tp = 0
20+ tn = 0
21+ fp = 0
22+ fn = 0
23+
24+ # read both protein and structure sequences into lists
25+ prot2 = readFastaFile ("prot2.fa" , Protein_Alphabet )
26+ sstr3 = readFastaFile ("sstr3.fa" , DSSP3_Alphabet )
27+
28+ for index in range (5 ):
29+ protein = prot2 [index ]
30+ structure = sstr3 [index ]
31+ # print("protein: ", protein)
32+ # print("structure: ", structure)
33+
34+ # print("Steps 1 to 4")
35+ # Step 2: Alpha-helix
36+ alpha = getScores (protein , 0 ) # values from column 0
37+ beta = getScores (protein , 1 ) # values from column 1
38+ calls_a1 = markCountAbove (alpha , width = 6 , call_cnt = 4 )
39+ # print(makesstr(calls_a1, 'H'))
40+
41+ calls_a2 = extendDownstream (alpha , calls_a1 , width = 4 )
42+ calls_a3 = extendUpstream (alpha , calls_a2 , width = 4 )
43+
44+ calls_b1 = markCountAbove (beta , width = 5 , call_cnt = 3 )
45+ # print(makesstr(calls_b1, 'E'))
46+ calls_b2 = extendDownstream (beta , calls_b1 , width = 4 )
47+ calls_b3 = extendUpstream (beta , calls_b2 , width = 4 )
48+
49+ avg_a = calcRegionAverage (alpha , calls_a3 )
50+ avg_b = calcRegionAverage (beta , calls_b3 )
51+
52+ diff_a = [avg_a [i ] - avg_b [i ] for i in range (len (avg_a ))]
53+ diff_b = [avg_b [i ] - avg_a [i ] for i in range (len (avg_a ))]
54+
55+ # print("Step 5")
56+ calls_a4 = checkSupport (calls_a3 , diff_a )
57+ calls_b4 = checkSupport (calls_b3 , diff_b )
58+ # print(makesstr(calls_a4, 'H'))
59+ # print(makesstr(calls_b4, 'E'))
60+
61+ # print("Final prediction")
62+ # Combine calls and replace remaining '-' symbols to C (coil)
63+ prediction = predicted_sequence = '' .join (
64+ ['H' if alpha else ('E' if beta else 'C' ) for (alpha , beta ) in zip (calls_a4 , calls_b4 )])
65+ # print(prediction)
66+
67+ position = 0
68+ for element in structure .sequence :
69+ if element == "H" and calls_a4 [position ]:
70+ atp += 1 # matched alpha helix
71+ if element != "H" and not (calls_a4 [position ]):
72+ atn += 1 # matched beta sheet
73+ if element != "H" and calls_a4 [position ]:
74+ afp += 1 # matched beta sheet
75+ if element == "H" and not (calls_a4 [position ]):
76+ afn += 1 # matched beta sheet
77+ position += 1
78+
79+ position = 0
80+ for element in structure .sequence :
81+ if element == "E" and calls_b4 [position ]:
82+ btp += 1 # matched alpha helix
83+ if element != "E" and not (calls_b4 [position ]):
84+ btn += 1 # matched beta sheet
85+ if element != "E" and calls_b4 [position ]:
86+ bfp += 1 # matched beta sheet
87+ if element == "E" and not (calls_b4 [position ]):
88+ bfn += 1 # matched beta sheet
89+ position += 1
90+
91+ position = 0
92+ for element in structure .sequence :
93+ if element == "C" and not (calls_a4 [position ]) and not (calls_b4 [position ]):
94+ ctp += 1 # matched alpha helix
95+ if element != "C" and ((calls_a4 [position ]) or (calls_b4 [position ])):
96+ ctn += 1 # matched beta sheet
97+ if element != "C" and not (calls_a4 [position ]) and not (calls_b4 [position ]):
98+ cfp += 1 # matched beta sheet
99+ if element == "C" and ((calls_a4 [position ]) or (calls_b4 [position ])):
100+ cfn += 1 # matched beta sheet
101+ position += 1
102+
103+ tp = atp + btp + ctp
104+ tn = atn + btn + ctn
105+ fp = afp + bfp + cfp
106+ fn = afn + bfn + cfn
107+
108+ ####### Accuracy calculations #######
109+ print ("alpha-helices True Positive = %d" % atp )
110+ print ("alpha-helices True Negative = %d" % atn )
111+ print ("alpha-helices False Positive = %d" % afp )
112+ print ("alpha-helices False Negative = %d" % afn )
113+ print ("alpha-helix Accuracy %.2f%%" % ((float ((atp + atn ) / (atp + atn + afp + afn )) * 100 )))
114+
115+ print ("beta-strands True Positive = %d" % btp )
116+ print ("beta-strands True Negative = %d" % btn )
117+ print ("beta-strands False Positive = %d" % bfp )
118+ print ("beta-strands False Negative = %d" % bfn )
119+ print ("beta-strands Accuracy %.2f%%" % ((float ((btp + btn ) / (btp + btn + bfp + bfn )) * 100 )))
120+
121+ print ("coil True Positive = %d" % ctp )
122+ print ("coil True Negative = %d" % ctn )
123+ print ("coil False Positive = %d" % cfp )
124+ print ("coil False Negative = %d" % cfn )
125+ print ("coil Accuracy %.2f%%" % ((float ((ctp + ctn ) / (ctp + ctn + cfp + cfn )) * 100 )))
126+
127+ print ("True Positive = %d" % tp )
128+ print ("True Negative = %d" % tn )
129+ print ("False Positive = %d" % fp )
130+ print ("False Negative = %d" % fn )
131+ print ("Accuracy %.2f%%" % ((float ((tp + tn ) / (tp + tn + fp + fn )) * 100 )))
0 commit comments