Skip to content

added TokenList::Stream class to wrap std::istream usage and implemented alternative C I/O version #244

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 2, 2023
Merged
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
097a765
added wrapper class "Stream" to TokenList
firewave Mar 5, 2022
c2ae449
moved some helper function into TokenList::Stream
firewave Mar 5, 2022
62acc58
simplified UTF-16 checks
firewave Mar 5, 2022
a2cc676
made simplecpp::TokenList::Stream an abstract class and moved impleme…
firewave Mar 5, 2022
07ec136
added simplecpp::TokenList::Stream implementation "FileStream" which …
firewave Mar 5, 2022
81fe8cd
use FileStream for includes as well
firewave Mar 5, 2022
4f0da78
constness
firewave Mar 5, 2022
1b52b22
made some TokenList::Stream members private
firewave Mar 5, 2022
95c747b
pulled out repeated UTF-16 character generation code
firewave Mar 5, 2022
892183e
main.cpp: added command-option "-is" to specify usage of std::istream…
firewave Mar 9, 2022
26ccd24
test.cpp: got rid of another `std::istringstream` usage
firewave Apr 14, 2022
e57f312
fixed FileStream::unget() with subsequent calls (i.e. UTF-16 encoding)
firewave Apr 19, 2022
bdc7f3c
simplified newline handling in TokenList::Stream::readChar()
firewave Apr 19, 2022
76b94e3
fixed handling of incomplete UTF-8 BOM 0xefbbbf
firewave Apr 19, 2022
cd084ae
test.cpp: added tests for incomplete UTF sequences
firewave Apr 19, 2022
797f899
keep lastCh intact when using FileStream::peek()
firewave Apr 20, 2022
c1da435
fixed uninitialized members in `FileStream`
firewave Oct 6, 2022
52b4b61
.clang-tidy: disabled `modernize-use-override` warning
firewave Oct 6, 2022
7ff8c98
fixed `readability-inconsistent-declaration-parameter-name` clean-tid…
firewave Oct 6, 2022
c9605fe
fixed `-Wshadow` Clang compiler warning
firewave Oct 6, 2022
557099c
adjusted parameter order in added `TokenList()` constructor / added s…
firewave Feb 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
simplified UTF-16 checks
  • Loading branch information
firewave committed Feb 25, 2023
commit 62acc5826dc822345898100ebc4df504c6acc844
14 changes: 8 additions & 6 deletions simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,9 @@ class simplecpp::TokenList::Stream {
public:
Stream(std::istream &istr)
: istr(istr)
, bom(getAndSkipBOM())
, isUtf16(bom == 0xfeff || bom == 0xfffe)
{
bom = getAndSkipBOM();
}

int get() {
Expand All @@ -257,7 +258,7 @@ class simplecpp::TokenList::Stream {

// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
// character is non-ASCII character then replace it with 0xff
if (bom == 0xfeff || bom == 0xfffe) {
if (isUtf16) {
const unsigned char ch2 = static_cast<unsigned char>(get());
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
Expand All @@ -268,7 +269,7 @@ class simplecpp::TokenList::Stream {
ch = '\n';
if (bom == 0 && static_cast<char>(peek()) == '\n')
(void)get();
else if (bom == 0xfeff || bom == 0xfffe) {
else if (isUtf16) {
int c1 = get();
int c2 = get();
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
Expand All @@ -288,7 +289,7 @@ class simplecpp::TokenList::Stream {

// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
// character is non-ASCII character then replace it with 0xff
if (bom == 0xfeff || bom == 0xfffe) {
if (isUtf16) {
(void)get();
const unsigned char ch2 = static_cast<unsigned char>(peek());
unget();
Expand All @@ -306,7 +307,7 @@ class simplecpp::TokenList::Stream {
void ungetChar()
{
unget();
if (bom == 0xfeff || bom == 0xfffe)
if (isUtf16)
unget();
}

Expand Down Expand Up @@ -339,7 +340,8 @@ class simplecpp::TokenList::Stream {
}

std::istream &istr;
unsigned short bom;
const unsigned short bom;
const bool isUtf16;
};

simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}
Expand Down