From a8410554bc4802b7d0db26a8e5c8ea006fcac7c0 Mon Sep 17 00:00:00 2001 From: Soichi Hayashi Date: Sun, 27 Jan 2019 16:40:00 -0500 Subject: [PATCH 01/12] fixed a bug where tck _read() is expecting specific binary expression for fiber_delimiter and eof_delimiter. --- nibabel/streamlines/tck.py | 75 +++++++++++++++----------------------- 1 file changed, 30 insertions(+), 45 deletions(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 31f0be0ab5..95db07d755 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -385,14 +385,9 @@ def _read(cls, fileobj, header, buffer_size=4): Streamline points """ dtype = header["_dtype"] - coordinate_size = 3 * dtype.itemsize - # Make buffer_size an integer and a multiple of coordinate_size. - buffer_size = int(buffer_size * MEGABYTE) - buffer_size += coordinate_size - (buffer_size % coordinate_size) - # Markers for streamline end and file end - fiber_marker = cls.FIBER_DELIMITER.astype(dtype).tostring() - eof_marker = cls.EOF_DELIMITER.astype(dtype).tostring() + #align batch_size to be multiple of 3 within the specified buffer size + batch_size = int(buffer_size * MEGABYTE / dtype.itemsize / 3) * 3 with Opener(fileobj) as f: start_position = f.tell() @@ -401,46 +396,36 @@ def _read(cls, fileobj, header, buffer_size=4): f.seek(header["_offset_data"], os.SEEK_SET) eof = False - buffs = [] n_streams = 0 - + leftover = np.empty([0,3]) while not eof: - buff = bytearray(buffer_size) - n_read = f.readinto(buff) - eof = n_read != buffer_size - if eof: - buff = buff[:n_read] - - buffs.append(buff) - - # Make sure we've read enough to find a streamline delimiter. - if fiber_marker not in buff: - # If we've read the whole file, then fail. - if eof: - # Could have minimal buffering, and have read only the - # EOF delimiter - buffs = [bytearray().join(buffs)] - if not buffs[0] == eof_marker: - raise DataError( - "Cannot find a streamline delimiter. This file" - " might be corrupted.") - else: - # Otherwise read a bit more. - continue - - all_parts = bytearray().join(buffs).split(fiber_marker) - point_parts, buffs = all_parts[:-1], all_parts[-1:] - point_parts = [p for p in point_parts if p != b''] - - for point_part in point_parts: - # Read floats. - pts = np.frombuffer(point_part, dtype=dtype) - # Convert data to little-endian if needed. - yield pts.astype(' Date: Mon, 28 Jan 2019 16:56:49 -0500 Subject: [PATCH 02/12] optimized performance (now it's 4% faster than the original code) --- nibabel/streamlines/tck.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 95db07d755..6e10e69f5e 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -397,7 +397,7 @@ def _read(cls, fileobj, header, buffer_size=4): eof = False n_streams = 0 - leftover = np.empty([0,3]) + leftover = np.empty((0,3), dtype=' Date: Tue, 29 Jan 2019 08:47:14 -0500 Subject: [PATCH 03/12] added matlab_nan.tck, a test tck file containing NaN and Inf in a different binary format from the current numpy.nan and numpy.inf --- nibabel/tests/data/matlab_nan.tck | Bin 0 -> 1657 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 nibabel/tests/data/matlab_nan.tck diff --git a/nibabel/tests/data/matlab_nan.tck b/nibabel/tests/data/matlab_nan.tck new file mode 100644 index 0000000000000000000000000000000000000000..6afc9af60c5fe38a6905ba21bc63b47d721de811 GIT binary patch literal 1657 zcmY+B3sh9)7RLt=qQixJrA#udOBeA;dhO>PLe#26z$95YZM?wGY&?X~t=``h1t|MvdA zZ5L(H;#SI{-e8qDqh8j_#ST-Va)C|I%W<)bQaOX*;^jo;oS4}vm5Q_Q7Fj2oMU!L} zY{o>TDms?4>)kq|Me>PaeTsW0-euS6W!YqR$Wo#*Cid6T-!Ja>@`-&}&Tf*;f{&B3 zOt*AdW-4TGb_=gFxWpHIf?wnN9pSAH-mD8SfRTxIaB7hI8>2qs51n zEdsB%-4lp{B~i)Af~>dce*2Et*$FBx-(vHDqm^-S@m%WS6i(r@EC|HTDi8Jx>{p-M zAAq&rjU^M(ZmO4Q{PDo?XfmaANImIi1vW0vAO-J?sGF)rQI2dQNjZM7PB((6VgoUB z20*}yyGXdPofvgN;P1YTcHOBb-i~1S?#n^6FXRY0k{bfmpWZ?*YFfyi3!xBhyotWw z`wiKgJ{FoExPh*zJ%n!>4~G7J6dyE1w2>2`L)(XDzBGpBZJrD>H}#@6E|l)<{VTlx zVK187IFU|In+EIp`_O7rIGtD*4i}E}qq=!f^rah0xaR3Y{CpKX86N?jmAz=TK7lTF zMMCD?9u!okq1zjxz~0b;oZv1*HT|Z?$EE)~MkzZ}6`==yBS-C%UTzQ-3h5{_9xs4LX?4s#E zYoPnp0c41;r<=Y`fm6;NbhqRKno++14(5B1`SgCe!TLNj>ATU>v4?3&WI9Yu@F2yz zO*HANg}^@f0cjtnG)2sS%J=%vtIa2mfKN80S=kvG$8=VDL-`E~^^H3|$7WI>%z` zn-8;1-9|W9Gzl-*J(pa!5DqP!$C7UgaOzkq9{YMS z`}Cj#($2Tx_67}8uXMuu&wP(PCsWykn@;#~cmOx{J=?vqP69c)4LRas8Cb#&t>1 zxM~{djL@-aSi50&h0Zx08Jkw}qHE9y$7pq9kw^o)B zEP=IYHfaqM*o7t$N*+ieuRkrak~|Tv7O2Tv*)GO zWs(zSjJ!Z{&%MmN=N&M&b~#x!yq2xCI^dz_R+84&%GfKv2yn4kPckFcvo%`>gM_Ebjq6(IJoQL5l0*Sw}kqsMpxV}y#S?)?^e~^czxh~=f+rq*Q*kPom zh@7pjV%xO7U1*C5O?r)W4%p!I#uDN@yOo`nY@nQ8N@@+;*uE|+w2v<(VYjN;4{9r1 vTv0+Q)^BIeZYhMSPB&>Ew}aKSSfFj7i1hw*C)?*SgF-<^|M0cG_`d%i^->W~ literal 0 HcmV?d00001 From 8ed4acadbeb92ecfffdf0ec80f05abb2d4753049 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 30 Jan 2019 22:41:18 -0500 Subject: [PATCH 04/12] RF: Reduce concatenations further, moderate cleanups --- nibabel/streamlines/tck.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 6e10e69f5e..847c852d31 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -386,7 +386,7 @@ def _read(cls, fileobj, header, buffer_size=4): """ dtype = header["_dtype"] - #align batch_size to be multiple of 3 within the specified buffer size + # Align batch_size to be multiple of 3 within the specified buffer size batch_size = int(buffer_size * MEGABYTE / dtype.itemsize / 3) * 3 with Opener(fileobj) as f: @@ -397,7 +397,7 @@ def _read(cls, fileobj, header, buffer_size=4): eof = False n_streams = 0 - leftover = np.empty((0,3), dtype=' Date: Wed, 30 Jan 2019 23:06:12 -0500 Subject: [PATCH 05/12] FIX: Return to bytearray/frombuffer approach --- nibabel/streamlines/tck.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 847c852d31..53fb26ee13 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -386,8 +386,10 @@ def _read(cls, fileobj, header, buffer_size=4): """ dtype = header["_dtype"] - # Align batch_size to be multiple of 3 within the specified buffer size - batch_size = int(buffer_size * MEGABYTE / dtype.itemsize / 3) * 3 + coordinate_size = 3 * dtype.itemsize + # Make buffer_size an integer and a multiple of coordinate_size. + buffer_size = int(buffer_size * MEGABYTE) + buffer_size += coordinate_size - (buffer_size % coordinate_size) with Opener(fileobj) as f: start_position = f.tell() @@ -399,11 +401,14 @@ def _read(cls, fileobj, header, buffer_size=4): n_streams = 0 leftover = np.empty((0, 3), dtype=' Date: Wed, 30 Jan 2019 23:06:46 -0500 Subject: [PATCH 06/12] FIX: Check final delimiter is ONLY infs --- nibabel/streamlines/tck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 53fb26ee13..17e41989a8 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -431,7 +431,7 @@ def _read(cls, fileobj, header, buffer_size=4): # the rest gets appended to the leftover leftover = coords[begin:] - if not np.isinf(leftover).all(): + if not (leftover.shape == (1, 3) and np.isinf(leftover).all()): raise DataError("Expecting end-of-file marker 'inf inf inf'") # In case the 'count' field was not provided. From 0a75431a96c47e809a414e62eda0a3dce887a62b Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 30 Jan 2019 23:16:56 -0500 Subject: [PATCH 07/12] TEST: Simple load test for matlab_nan.tck --- nibabel/streamlines/tests/test_tck.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nibabel/streamlines/tests/test_tck.py b/nibabel/streamlines/tests/test_tck.py index f5752bbe8b..ad16b52754 100644 --- a/nibabel/streamlines/tests/test_tck.py +++ b/nibabel/streamlines/tests/test_tck.py @@ -32,6 +32,7 @@ def setup(): "simple_big_endian.tck") # standard.tck contains only streamlines DATA['standard_tck_fname'] = pjoin(data_path, "standard.tck") + DATA['matlab_nan_tck_fname'] = pjoin(data_path, "matlab_nan.tck") DATA['streamlines'] = [np.arange(1 * 3, dtype="f4").reshape((1, 3)), np.arange(2 * 3, dtype="f4").reshape((2, 3)), @@ -64,6 +65,13 @@ def test_load_simple_file(self): tck = TckFile(tractogram, header=hdr) assert_tractogram_equal(tck.tractogram, DATA['simple_tractogram']) + def test_load_matlab_nan_file(self): + for lazy_load in [False, True]: + tck = TckFile.load(DATA['matlab_nan_tck_fname'], lazy_load=lazy_load) + streamlines = list(tck.tractogram.streamlines) + assert_equal(len(streamlines), 1) + assert_equal(streamlines[0].shape, (108, 3)) + def test_writeable_data(self): data = DATA['simple_tractogram'] for key in ('simple_tck_fname', 'simple_tck_big_endian_fname'): From 676df4d11983ab7c0021f7f3d520e5acbac9c616 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 30 Jan 2019 23:29:17 -0500 Subject: [PATCH 08/12] STY: Reduce diff --- nibabel/streamlines/tck.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 17e41989a8..cb9994a53c 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -385,7 +385,6 @@ def _read(cls, fileobj, header, buffer_size=4): Streamline points """ dtype = header["_dtype"] - coordinate_size = 3 * dtype.itemsize # Make buffer_size an integer and a multiple of coordinate_size. buffer_size = int(buffer_size * MEGABYTE) @@ -398,8 +397,9 @@ def _read(cls, fileobj, header, buffer_size=4): f.seek(header["_offset_data"], os.SEEK_SET) eof = False - n_streams = 0 leftover = np.empty((0, 3), dtype=' Date: Wed, 30 Jan 2019 23:43:34 -0500 Subject: [PATCH 09/12] RF: Restore missing streamline delimiter error --- nibabel/streamlines/tck.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index cb9994a53c..fcc39f1428 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -432,7 +432,11 @@ def _read(cls, fileobj, header, buffer_size=4): leftover = coords[begin:] if not (leftover.shape == (1, 3) and np.isinf(leftover).all()): - raise DataError("Expecting end-of-file marker 'inf inf inf'") + if n_streams == 0: + msg = "Cannot find a streamline delimiter. This file might be corrupted." + else: + msg = "Expecting end-of-file marker 'inf inf inf'" + raise DataError(msg) # In case the 'count' field was not provided. header[Field.NB_STREAMLINES] = n_streams From 196f13afe4b082b61e1c9cf7fedb8a9400518a01 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 30 Jan 2019 23:49:30 -0500 Subject: [PATCH 10/12] STY: Pacify flake8 --- nibabel/cifti2/cifti2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/cifti2/cifti2.py b/nibabel/cifti2/cifti2.py index 0ffe45a169..67dab1d0c2 100644 --- a/nibabel/cifti2/cifti2.py +++ b/nibabel/cifti2/cifti2.py @@ -287,7 +287,7 @@ def rgba(self): return (self.red, self.green, self.blue, self.alpha) def _to_xml_element(self): - if self.label is '': + if self.label == '': raise Cifti2HeaderError('Label needs a name') try: v = int(self.key) From bac19880f2e3e9fdcd54b454f1aa1455adb07bd3 Mon Sep 17 00:00:00 2001 From: Soichi Hayashi Date: Mon, 4 Feb 2019 00:05:22 -0500 Subject: [PATCH 11/12] reapplied @MarcCote's suggestion. --- nibabel/streamlines/tck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index fcc39f1428..2fe8ef4651 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -424,7 +424,7 @@ def _read(cls, fileobj, header, buffer_size=4): for delim in delims: pts = coords[begin:delim] if pts.size: - yield coords[begin:delim] + yield pts n_streams += 1 begin = delim + 1 From 486bbb2160c73ab616867bda366540753c4290fa Mon Sep 17 00:00:00 2001 From: Soichi Hayashi Date: Mon, 4 Feb 2019 08:19:03 -0500 Subject: [PATCH 12/12] applied another @MarcCote suggestion --- nibabel/streamlines/tck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/streamlines/tck.py b/nibabel/streamlines/tck.py index 2fe8ef4651..9b1888ebba 100644 --- a/nibabel/streamlines/tck.py +++ b/nibabel/streamlines/tck.py @@ -428,7 +428,7 @@ def _read(cls, fileobj, header, buffer_size=4): n_streams += 1 begin = delim + 1 - # The rest gets appended to the leftover + # The rest becomes the new leftover. leftover = coords[begin:] if not (leftover.shape == (1, 3) and np.isinf(leftover).all()):