Added rule to ecpg lexer to accept "Unicode surrogate pair in extended quoted
authorMichael Meskes <[email protected]>
Thu, 23 Dec 2010 11:41:12 +0000 (12:41 +0100)
committerMichael Meskes <[email protected]>
Thu, 23 Dec 2010 19:37:42 +0000 (20:37 +0100)
string". This is not really needed because the string gets copied to the output
untranslated anyway, but by adding this rule the lexer stays in sync with the
backend lexer.

src/interfaces/ecpg/preproc/pgc.l

index b7e46866f72f1ffb677553008986bc51732f04bb..f528f214c0e842ce3e2d3df5180bcb765927b5f1 100644 (file)
@@ -146,6 +146,7 @@ xeinside        [^\\']+
 xeescape       [\\][^0-7]
 xeoctesc       [\\][0-7]{1,3}
 xehexesc       [\\]x[0-9A-Fa-f]{1,2}
+xeunicode      [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
 
 /* C version of hex number */
 xch                0[xX][0-9A-Fa-f]*
@@ -505,6 +506,7 @@ cppline         {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})(.*\\{space})*.
            }
 <xq,xqc,xn,xus>{xqinside}  { addlit(yytext, yyleng); }
 <xe>{xeinside}     { addlit(yytext, yyleng); }
+<xe>{xeunicode}        { addlit(yytext, yyleng); }
 <xe>{xeescape}     { addlit(yytext, yyleng); }
 <xe>{xeoctesc}     { addlit(yytext, yyleng); }
 <xe>{xehexesc}     { addlit(yytext, yyleng); }