Skip to content

Commit b453a29

Browse files
author
Lenna X. Peterson
committed
Change to match serial_id/model_id in PDB parser.
1 parent c0d60c6 commit b453a29

File tree

2 files changed

+32
-11
lines changed

2 files changed

+32
-11
lines changed

Bio/PDB/MMCIFParser.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""mmCIF parser (partly implemented in C)."""
77

88
from string import ascii_letters
9+
import warnings
910

1011
import numpy
1112

@@ -24,7 +25,10 @@ def _build_structure(self, structure_id):
2425
mmcif_dict=self._mmcif_dict
2526
atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
2627
residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
27-
element_list = mmcif_dict["_atom_site.type_symbol"]
28+
try:
29+
element_list = mmcif_dict["_atom_site.type_symbol"]
30+
except KeyError:
31+
element_list = None
2832
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
2933
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
3034
x_list=map(float, mmcif_dict["_atom_site.Cartn_x"])
@@ -34,7 +38,14 @@ def _build_structure(self, structure_id):
3438
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
3539
occupancy_list=mmcif_dict["_atom_site.occupancy"]
3640
fieldname_list=mmcif_dict["_atom_site.group_PDB"]
37-
model_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
41+
try:
42+
serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
43+
except KeyError:
44+
# No model number column
45+
serial_list = None
46+
except ValueError:
47+
# Invalid model number (malformed file)
48+
warnings.warn("ERROR: Invalid model number", RuntimeError)
3849
try:
3950
aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
4051
aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
@@ -58,7 +69,10 @@ def _build_structure(self, structure_id):
5869
structure_builder=self._structure_builder
5970
structure_builder.init_structure(structure_id)
6071
structure_builder.init_seg(" ")
61-
current_model_id = -1
72+
# Historically, Biopython PDB parser uses model_id to mean array index
73+
# so serial_id means the Model ID specified in the file
74+
current_model_id = 0
75+
current_serial_id = 0
6276
for i in xrange(0, len(atom_id_list)):
6377
x=x_list[i]
6478
y=y_list[i]
@@ -77,9 +91,16 @@ def _build_structure(self, structure_id):
7791
hetatm_flag="H"
7892
else:
7993
hetatm_flag=" "
80-
model_id = model_list[i]
81-
if current_model_id != model_id:
82-
current_model_id = model_id
94+
if serial_list is not None:
95+
# model column exists; use it
96+
serial_id = serial_list[i]
97+
if current_serial_id != serial_id:
98+
# if serial changes, update it and start new model
99+
current_serial_id = serial_id
100+
structure_builder.init_model(current_model_id, current_serial_id)
101+
current_model_id += 1
102+
else:
103+
# no explicit model column; initialize single model
83104
structure_builder.init_model(current_model_id)
84105
if current_chain_id!=chainid:
85106
current_chain_id=chainid
@@ -94,7 +115,7 @@ def _build_structure(self, structure_id):
94115
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
95116
icode)
96117
coord=numpy.array((x, y, z), 'f')
97-
element = element_list[i]
118+
element = element_list[i] if element_list else None
98119
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
99120
name, element=element)
100121
if aniso_flag==1:

Tests/test_MMCIF.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_parser(self):
4747
for ppbuild in [PPBuilder(), CaPPBuilder()]:
4848
#==========================================================
4949
#First try allowing non-standard amino acids,
50-
polypeptides = ppbuild.build_peptides(structure[1], False)
50+
polypeptides = ppbuild.build_peptides(structure[0], False)
5151
self.assertEqual(len(polypeptides), 1)
5252
pp = polypeptides[0]
5353
# Check the start and end positions
@@ -64,7 +64,7 @@ def test_parser(self):
6464
#Now try strict version with only standard amino acids
6565
#Should ignore MSE 151 at start, and then break the chain
6666
#at MSE 185, and MSE 214,215
67-
polypeptides = ppbuild.build_peptides(structure[1], True)
67+
polypeptides = ppbuild.build_peptides(structure[0], True)
6868
self.assertEqual(len(polypeptides), 3)
6969
#First fragment
7070
pp = polypeptides[0]
@@ -99,7 +99,7 @@ def testModels(self):
9999
for ppbuild in [PPBuilder(), CaPPBuilder()]:
100100
#==========================================================
101101
#First try allowing non-standard amino acids,
102-
polypeptides = ppbuild.build_peptides(structure[1], False)
102+
polypeptides = ppbuild.build_peptides(structure[0], False)
103103
self.assertEqual(len(polypeptides), 1)
104104
pp = polypeptides[0]
105105
# Check the start and end positions
@@ -114,7 +114,7 @@ def testModels(self):
114114
str(s))
115115
#==========================================================
116116
#Now try strict version with only standard amino acids
117-
polypeptides = ppbuild.build_peptides(structure[1], True)
117+
polypeptides = ppbuild.build_peptides(structure[0], True)
118118
self.assertEqual(len(polypeptides), 1)
119119
pp = polypeptides[0]
120120
# Check the start and end positions

0 commit comments

Comments
 (0)