Skip to content

Commit d1a69a0

Browse files
authored
api: iteration should stop after I/O error
This commit changes the behavior of the CSV reader (and its iterators) to stop when an I/O error from the underlying reader is encountered. When reading and parsing a record fails, correct behavior in most cases to keep on trying to read and parse the next record. That behavior remains the same after this commit. However when the Error is caused by failing to read from the underlying reader, the next read would almost always fail with the exact same error. In that scenario, reading from the underlying reader should not be attempted when trying to extract the next record, as this may lead to infinite loops. Instead, the reader will behave in the same way as if an end-of-file had been reached. Fixes BurntSushi#207
1 parent 7120114 commit d1a69a0

File tree

2 files changed

+63
-9
lines changed

2 files changed

+63
-9
lines changed

src/reader.rs

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,10 @@ impl ReaderBuilder {
686686
/// [`ReaderBuilder`](struct.ReaderBuilder.html).
687687
/// * When reading CSV data from a resource (like a file), it is possible for
688688
/// reading from the underlying resource to fail. This will return an error.
689+
/// For subsequent calls to the `Reader` after encountering a such error
690+
/// (unless `seek` is used), it will behave as if end of file had been
691+
/// reached, in order to avoid running into infinite loops when still
692+
/// attempting to read the next record when one has errored.
689693
/// * When reading CSV data into `String` or `&str` fields (e.g., via a
690694
/// [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
691695
/// enforced. If CSV data is invalid UTF-8, then an error is returned. If
@@ -741,7 +745,34 @@ struct ReaderState {
741745
/// Whether the reader has been seeked or not.
742746
seeked: bool,
743747
/// Whether EOF of the underlying reader has been reached or not.
744-
eof: bool,
748+
///
749+
/// IO errors on the underlying reader will be considered as an EOF for
750+
/// subsequent read attempts, as it would be incorrect to keep on trying
751+
/// to read when the underlying reader has broken.
752+
///
753+
/// For clarity, having the best `Debug` impl and in case they need to be
754+
/// treated differently at some point, we store whether the `EOF` is
755+
/// considered because an actual EOF happened, or because we encoundered
756+
/// an IO error.
757+
/// This has no additional runtime cost.
758+
eof: ReaderEofState,
759+
}
760+
761+
/// Whether EOF of the underlying reader has been reached or not.
762+
///
763+
/// IO errors on the underlying reader will be considered as an EOF for
764+
/// subsequent read attempts, as it would be incorrect to keep on trying
765+
/// to read when the underlying reader has broken.
766+
///
767+
/// For clarity, having the best `Debug` impl and in case they need to be
768+
/// treated differently at some point, we store whether the `EOF` is
769+
/// considered because an actual EOF happened, or because we encoundered
770+
/// an IO error
771+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
772+
enum ReaderEofState {
773+
NotEof,
774+
Eof,
775+
IOError,
745776
}
746777

747778
/// Headers encapsulates any data associated with the headers of CSV data.
@@ -798,7 +829,7 @@ impl<R: io::Read> Reader<R> {
798829
cur_pos: Position::new(),
799830
first: false,
800831
seeked: false,
801-
eof: false,
832+
eof: ReaderEofState::NotEof,
802833
},
803834
}
804835
}
@@ -1598,13 +1629,17 @@ impl<R: io::Read> Reader<R> {
15981629

15991630
record.clear();
16001631
record.set_position(Some(self.state.cur_pos.clone()));
1601-
if self.state.eof {
1632+
if self.state.eof != ReaderEofState::NotEof {
16021633
return Ok(false);
16031634
}
16041635
let (mut outlen, mut endlen) = (0, 0);
16051636
loop {
16061637
let (res, nin, nout, nend) = {
1607-
let input = self.rdr.fill_buf()?;
1638+
let input_res = self.rdr.fill_buf();
1639+
if input_res.is_err() {
1640+
self.state.eof = ReaderEofState::IOError;
1641+
}
1642+
let input = input_res?;
16081643
let (fields, ends) = record.as_parts();
16091644
self.core.read_record(
16101645
input,
@@ -1636,7 +1671,7 @@ impl<R: io::Read> Reader<R> {
16361671
return Ok(true);
16371672
}
16381673
End => {
1639-
self.state.eof = true;
1674+
self.state.eof = ReaderEofState::Eof;
16401675
return Ok(false);
16411676
}
16421677
}
@@ -1716,7 +1751,7 @@ impl<R: io::Read> Reader<R> {
17161751
/// }
17171752
/// ```
17181753
pub fn is_done(&self) -> bool {
1719-
self.state.eof
1754+
self.state.eof != ReaderEofState::NotEof
17201755
}
17211756

17221757
/// Returns true if and only if this reader has been configured to
@@ -1817,7 +1852,7 @@ impl<R: io::Read + io::Seek> Reader<R> {
18171852
self.core.reset();
18181853
self.core.set_line(pos.line());
18191854
self.state.cur_pos = pos;
1820-
self.state.eof = false;
1855+
self.state.eof = ReaderEofState::NotEof;
18211856
Ok(())
18221857
}
18231858

@@ -1845,7 +1880,7 @@ impl<R: io::Read + io::Seek> Reader<R> {
18451880
self.core.reset();
18461881
self.core.set_line(pos.line());
18471882
self.state.cur_pos = pos;
1848-
self.state.eof = false;
1883+
self.state.eof = ReaderEofState::NotEof;
18491884
Ok(())
18501885
}
18511886
}

tests/tests.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#![allow(dead_code)]
22

3+
use csv::Reader;
4+
35
use std::env;
4-
use std::io::{self, Write};
6+
use std::io::{self, Read, Write};
57
use std::path::PathBuf;
68
use std::process::{self, Command};
79

@@ -338,6 +340,23 @@ fn tutorial_perf_core_01() {
338340
assert_eq!(out.stdout(), "11\n");
339341
}
340342

343+
#[test]
344+
fn no_infinite_loop_on_io_errors() {
345+
struct FailingRead;
346+
impl Read for FailingRead {
347+
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
348+
Err(io::Error::new(io::ErrorKind::Other, "Broken reader"))
349+
}
350+
}
351+
352+
let mut record_results = Reader::from_reader(FailingRead).into_records();
353+
let first_result = record_results.next();
354+
assert!(
355+
matches!(&first_result, Some(Err(e)) if matches!(e.kind(), csv::ErrorKind::Io(_)))
356+
);
357+
assert!(record_results.next().is_none());
358+
}
359+
341360
// Helper functions follow.
342361

343362
/// Return the target/debug directory path.

0 commit comments

Comments
 (0)