Skip to content

Commit cbaae62

Browse files
committed
update
1 parent b4f459b commit cbaae62

File tree

11 files changed

+1828
-0
lines changed

11 files changed

+1828
-0
lines changed

SCIE2100_Practical6/.ipynb_checkpoints/try_prac6-checkpoint.ipynb

Lines changed: 776 additions & 0 deletions
Large diffs are not rendered by default.
24.3 KB
Binary file not shown.
35.1 KB
Binary file not shown.
11.5 KB
Binary file not shown.
10.9 KB
Binary file not shown.
14.8 KB
Binary file not shown.
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from sequence import *
2+
from sym import *
3+
from sstruct import *
4+
5+
atp = 0 # number of true positives (correctly identified calls)
6+
atn = 0 # number of true negatives (correctly missed no-calls)
7+
afp = 0 # number of false positives (incorrectly identified no-calls)
8+
afn = 0 # number of false negatives (incorrectly missed calls)
9+
10+
btp = 0
11+
btn = 0
12+
bfp = 0
13+
bfn = 0
14+
15+
ctp = 0
16+
ctn = 0
17+
cfp = 0
18+
cfn = 0
19+
tp = 0
20+
tn = 0
21+
fp = 0
22+
fn = 0
23+
24+
# read both protein and structure sequences into lists
25+
prot2 = readFastaFile("prot2.fa", Protein_Alphabet)
26+
sstr3 = readFastaFile("sstr3.fa", DSSP3_Alphabet)
27+
28+
for index in range(5):
29+
protein = prot2[index]
30+
structure = sstr3[index]
31+
# print("protein: ", protein)
32+
# print("structure: ", structure)
33+
34+
# print("Steps 1 to 4")
35+
# Step 2: Alpha-helix
36+
alpha = getScores(protein, 0) # values from column 0
37+
beta = getScores(protein, 1) # values from column 1
38+
calls_a1 = markCountAbove(alpha, width=6, call_cnt=4)
39+
# print(makesstr(calls_a1, 'H'))
40+
41+
calls_a2 = extendDownstream(alpha, calls_a1, width=4)
42+
calls_a3 = extendUpstream(alpha, calls_a2, width=4)
43+
44+
calls_b1 = markCountAbove(beta, width=5, call_cnt=3)
45+
# print(makesstr(calls_b1, 'E'))
46+
calls_b2 = extendDownstream(beta, calls_b1, width=4)
47+
calls_b3 = extendUpstream(beta, calls_b2, width=4)
48+
49+
avg_a = calcRegionAverage(alpha, calls_a3)
50+
avg_b = calcRegionAverage(beta, calls_b3)
51+
52+
diff_a = [avg_a[i] - avg_b[i] for i in range(len(avg_a))]
53+
diff_b = [avg_b[i] - avg_a[i] for i in range(len(avg_a))]
54+
55+
# print("Step 5")
56+
calls_a4 = checkSupport(calls_a3, diff_a)
57+
calls_b4 = checkSupport(calls_b3, diff_b)
58+
# print(makesstr(calls_a4, 'H'))
59+
# print(makesstr(calls_b4, 'E'))
60+
61+
# print("Final prediction")
62+
# Combine calls and replace remaining '-' symbols to C (coil)
63+
prediction = predicted_sequence = ''.join(
64+
['H' if alpha else ('E' if beta else 'C') for (alpha, beta) in zip(calls_a4, calls_b4)])
65+
# print(prediction)
66+
67+
position = 0
68+
for element in structure.sequence:
69+
if element == "H" and calls_a4[position]:
70+
atp += 1 # matched alpha helix
71+
if element != "H" and not (calls_a4[position]):
72+
atn += 1 # matched beta sheet
73+
if element != "H" and calls_a4[position]:
74+
afp += 1 # matched beta sheet
75+
if element == "H" and not (calls_a4[position]):
76+
afn += 1 # matched beta sheet
77+
position += 1
78+
79+
position = 0
80+
for element in structure.sequence:
81+
if element == "E" and calls_b4[position]:
82+
btp += 1 # matched alpha helix
83+
if element != "E" and not (calls_b4[position]):
84+
btn += 1 # matched beta sheet
85+
if element != "E" and calls_b4[position]:
86+
bfp += 1 # matched beta sheet
87+
if element == "E" and not (calls_b4[position]):
88+
bfn += 1 # matched beta sheet
89+
position += 1
90+
91+
position = 0
92+
for element in structure.sequence:
93+
if element == "C" and not (calls_a4[position]) and not (calls_b4[position]):
94+
ctp += 1 # matched alpha helix
95+
if element != "C" and ((calls_a4[position]) or (calls_b4[position])):
96+
ctn += 1 # matched beta sheet
97+
if element != "C" and not (calls_a4[position]) and not (calls_b4[position]):
98+
cfp += 1 # matched beta sheet
99+
if element == "C" and ((calls_a4[position]) or (calls_b4[position])):
100+
cfn += 1 # matched beta sheet
101+
position += 1
102+
103+
tp = atp + btp + ctp
104+
tn = atn + btn + ctn
105+
fp = afp + bfp + cfp
106+
fn = afn + bfn + cfn
107+
108+
####### Accuracy calculations #######
109+
print("alpha-helices True Positive = %d" % atp)
110+
print("alpha-helices True Negative = %d" % atn)
111+
print("alpha-helices False Positive = %d" % afp)
112+
print("alpha-helices False Negative = %d" % afn)
113+
print("alpha-helix Accuracy %.2f%%" % ((float((atp + atn) / (atp + atn + afp + afn)) * 100)))
114+
115+
print("beta-strands True Positive = %d" % btp)
116+
print("beta-strands True Negative = %d" % btn)
117+
print("beta-strands False Positive = %d" % bfp)
118+
print("beta-strands False Negative = %d" % bfn)
119+
print("beta-strands Accuracy %.2f%%" % ((float((btp + btn) / (btp + btn + bfp + bfn)) * 100)))
120+
121+
print("coil True Positive = %d" % ctp)
122+
print("coil True Negative = %d" % ctn)
123+
print("coil False Positive = %d" % cfp)
124+
print("coil False Negative = %d" % cfn)
125+
print("coil Accuracy %.2f%%" % ((float((ctp + ctn) / (ctp + ctn + cfp + cfn)) * 100)))
126+
127+
print("True Positive = %d" % tp)
128+
print("True Negative = %d" % tn)
129+
print("False Positive = %d" % fp)
130+
print("False Negative = %d" % fn)
131+
print("Accuracy %.2f%%" % ((float((tp + tn) / (tp + tn + fp + fn)) * 100)))
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
def getScoresWithForLoop(seq, index=0):
2+
""" Same functionality as getScores, but using a for loop instead of a list comprehension.
3+
"""
4+
scoreList = []
5+
for s in seq:
6+
scoreList.append(cf_dict[s.upper()][index])
7+
8+
return scoreList
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
from sequence import *
2+
from sym import *
3+
from sstruct import *
4+
5+
# read both protein and structure sequences into lists
6+
prot2 = readFastaFile("prot2.fa", Protein_Alphabet)
7+
sstr3 = readFastaFile("sstr3.fa", DSSP3_Alphabet)
8+
9+
# store protein and structure sequences in dictionary for easy retrieval
10+
protein_map = {}
11+
structure_map = {}
12+
13+
for protein in prot2:
14+
protein_map.update({protein.name: protein})
15+
16+
for structure in sstr3:
17+
structure_map.update({structure.name: structure})
18+
19+
protein = protein_map.get("1EVH")
20+
structure = structure_map.get("1EVH")
21+
print("protein: ", protein)
22+
print("structure: ", structure)
23+
24+
print("Steps 1 to 4")
25+
# Step 2: Alpha-helix
26+
alpha = getScores(protein, 0) # values from column 0
27+
beta = getScores(protein, 1) # values from column 1
28+
calls_a1 = markCountAbove(alpha, width=6, call_cnt=4)
29+
print(makesstr(calls_a1, 'H'))
30+
31+
calls_a2 = extendDownstream(alpha, calls_a1, width=4)
32+
calls_a3 = extendUpstream(alpha, calls_a2, width=4)
33+
34+
calls_b1 = markCountAbove(beta, width=5, call_cnt=3)
35+
print(makesstr(calls_b1, 'E'))
36+
calls_b2 = extendDownstream(beta, calls_b1, width=4)
37+
calls_b3 = extendUpstream(beta, calls_b2, width=4)
38+
39+
avg_a = calcRegionAverage(alpha, calls_a3)
40+
avg_b = calcRegionAverage(beta, calls_b3)
41+
42+
diff_a = [avg_a[i] - avg_b[i] for i in range(len(avg_a))]
43+
diff_b = [avg_b[i] - avg_a[i] for i in range(len(avg_a))]
44+
45+
print("Step 5")
46+
calls_a4 = checkSupport(calls_a3, diff_a)
47+
calls_b4 = checkSupport(calls_b3, diff_b)
48+
print(makesstr(calls_a4, 'H'))
49+
print(makesstr(calls_b4, 'E'))
50+
51+
print("Final prediction")
52+
# Combine calls and replace remaining '-' symbols to C (coil)
53+
prediction = predicted_sequence = ''.join(['H' if alpha else ('E' if beta else 'C') for (alpha, beta) in zip(calls_a4, calls_b4)])
54+
print(prediction)
55+
56+
position = 0
57+
tp = 0 # number of true positives (correctly identified calls)
58+
tn = 0 # number of true negatives (correctly missed no-calls)
59+
fp = 0 # number of false positives (incorrectly identified no-calls)
60+
fn = 0 # number of false negatives (incorrectly missed calls)
61+
for element in structure.sequence:
62+
if element == "H" and calls_a4[position]:
63+
tp += 1 # matched alpha helix
64+
if element != "H" and not(calls_a4[position]):
65+
tn += 1 # matched beta sheet
66+
if element != "H" and calls_a4[position]:
67+
fp += 1 # matched beta sheet
68+
if element == "H" and not(calls_a4[position]):
69+
fn += 1 # matched beta sheet
70+
position += 1
71+
72+
print("alpha-helix Accuracy %.2f%%" % ((float((tp+tn) / (tp+tn+fp+fn)) * 100)))
73+
74+
position = 0
75+
tp = 0 # number of true positives (correctly identified calls)
76+
tn = 0 # number of true negatives (correctly missed no-calls)
77+
fp = 0 # number of false positives (incorrectly identified no-calls)
78+
fn = 0 # number of false negatives (incorrectly missed calls)
79+
for element in structure.sequence:
80+
if element == "E" and calls_b4[position]:
81+
tp += 1 # matched alpha helix
82+
if element != "E" and not(calls_b4[position]):
83+
tn += 1 # matched beta sheet
84+
if element != "E" and calls_b4[position]:
85+
fp += 1 # matched beta sheet
86+
if element == "E" and not(calls_b4[position]):
87+
fn += 1 # matched beta sheet
88+
position += 1
89+
90+
print("beta-strand Accuracy %.2f%%" % ((float((tp+tn) / (tp+tn+fp+fn)) * 100)))

0 commit comments

Comments
 (0)