Skip to content

FIX: Accept any valid delimiters/EOF markers in TCK files #720

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nibabel/cifti2/cifti2.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def rgba(self):
return (self.red, self.green, self.blue, self.alpha)

def _to_xml_element(self):
if self.label is '':
if self.label == '':
raise Cifti2HeaderError('Label needs a name')
try:
v = int(self.key)
Expand Down
67 changes: 31 additions & 36 deletions nibabel/streamlines/tck.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,18 +390,14 @@ def _read(cls, fileobj, header, buffer_size=4):
buffer_size = int(buffer_size * MEGABYTE)
buffer_size += coordinate_size - (buffer_size % coordinate_size)

# Markers for streamline end and file end
fiber_marker = cls.FIBER_DELIMITER.astype(dtype).tostring()
eof_marker = cls.EOF_DELIMITER.astype(dtype).tostring()

with Opener(fileobj) as f:
start_position = f.tell()

# Set the file position at the beginning of the data.
f.seek(header["_offset_data"], os.SEEK_SET)

eof = False
buffs = []
leftover = np.empty((0, 3), dtype='<f4')
n_streams = 0

while not eof:
Expand All @@ -411,37 +407,36 @@ def _read(cls, fileobj, header, buffer_size=4):
if eof:
buff = buff[:n_read]

buffs.append(buff)

# Make sure we've read enough to find a streamline delimiter.
if fiber_marker not in buff:
# If we've read the whole file, then fail.
if eof:
# Could have minimal buffering, and have read only the
# EOF delimiter
buffs = [bytearray().join(buffs)]
if not buffs[0] == eof_marker:
raise DataError(
"Cannot find a streamline delimiter. This file"
" might be corrupted.")
else:
# Otherwise read a bit more.
continue

all_parts = bytearray().join(buffs).split(fiber_marker)
point_parts, buffs = all_parts[:-1], all_parts[-1:]
point_parts = [p for p in point_parts if p != b'']

for point_part in point_parts:
# Read floats.
pts = np.frombuffer(point_part, dtype=dtype)
# Convert data to little-endian if needed.
yield pts.astype('<f4', copy=False).reshape([-1, 3])

n_streams += len(point_parts)

if not buffs[-1] == eof_marker:
raise DataError("Expecting end-of-file marker 'inf inf inf'")
raw_values = np.frombuffer(buff, dtype=dtype)

# Convert raw_values into a list of little-endian triples (for x,y,z coord)
coords = raw_values.astype('<f4', copy=False).reshape((-1, 3))

# Find stream delimiter locations (all NaNs)
delims = np.where(np.isnan(coords).all(axis=1))[0]

# Recover leftovers, which can't have delimiters in them
if leftover.size:
delims += leftover.shape[0]
coords = np.vstack((leftover, coords))

begin = 0
for delim in delims:
pts = coords[begin:delim]
if pts.size:
yield pts
n_streams += 1
begin = delim + 1

# The rest becomes the new leftover.
leftover = coords[begin:]

if not (leftover.shape == (1, 3) and np.isinf(leftover).all()):
if n_streams == 0:
msg = "Cannot find a streamline delimiter. This file might be corrupted."
else:
msg = "Expecting end-of-file marker 'inf inf inf'"
raise DataError(msg)

# In case the 'count' field was not provided.
header[Field.NB_STREAMLINES] = n_streams
Expand Down
8 changes: 8 additions & 0 deletions nibabel/streamlines/tests/test_tck.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def setup():
"simple_big_endian.tck")
# standard.tck contains only streamlines
DATA['standard_tck_fname'] = pjoin(data_path, "standard.tck")
DATA['matlab_nan_tck_fname'] = pjoin(data_path, "matlab_nan.tck")

DATA['streamlines'] = [np.arange(1 * 3, dtype="f4").reshape((1, 3)),
np.arange(2 * 3, dtype="f4").reshape((2, 3)),
Expand Down Expand Up @@ -64,6 +65,13 @@ def test_load_simple_file(self):
tck = TckFile(tractogram, header=hdr)
assert_tractogram_equal(tck.tractogram, DATA['simple_tractogram'])

def test_load_matlab_nan_file(self):
for lazy_load in [False, True]:
tck = TckFile.load(DATA['matlab_nan_tck_fname'], lazy_load=lazy_load)
streamlines = list(tck.tractogram.streamlines)
assert_equal(len(streamlines), 1)
assert_equal(streamlines[0].shape, (108, 3))

def test_writeable_data(self):
data = DATA['simple_tractogram']
for key in ('simple_tck_fname', 'simple_tck_big_endian_fname'):
Expand Down
Binary file added nibabel/tests/data/matlab_nan.tck
Binary file not shown.