@@ -1603,28 +1603,33 @@ impl<'a> Tokenizer<'a> {
16031603 ) -> Result < Option < Token > , TokenizerError > {
16041604 let mut s = String :: new ( ) ;
16051605 let mut nested = 1 ;
1606- let mut last_ch = ' ' ;
1606+ let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
16071607
16081608 loop {
16091609 match chars. next ( ) {
1610- Some ( ch) => {
1611- if last_ch == '/' && ch == '*' {
1612- nested += 1 ;
1613- } else if last_ch == '*' && ch == '/' {
1614- nested -= 1 ;
1615- if nested == 0 {
1616- s. pop ( ) ;
1617- break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
1618- }
1610+ Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
1611+ chars. next ( ) ; // consume the '*'
1612+ s. push ( '/' ) ;
1613+ s. push ( '*' ) ;
1614+ nested += 1 ;
1615+ }
1616+ Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
1617+ chars. next ( ) ; // consume the '/'
1618+ nested -= 1 ;
1619+ if nested == 0 {
1620+ break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
16191621 }
1622+ s. push ( '*' ) ;
1623+ s. push ( '/' ) ;
1624+ }
1625+ Some ( ch) => {
16201626 s. push ( ch) ;
1621- last_ch = ch;
16221627 }
16231628 None => {
16241629 break self . tokenizer_error (
16251630 chars. location ( ) ,
16261631 "Unexpected EOF while in a multi-line comment" ,
1627- )
1632+ ) ;
16281633 }
16291634 }
16301635 }
@@ -2466,18 +2471,90 @@ mod tests {
24662471
24672472 #[ test]
24682473 fn tokenize_nested_multiline_comment ( ) {
2469- let sql = String :: from ( "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ) ;
2474+ let dialect = GenericDialect { } ;
2475+ let test_cases = vec ! [
2476+ (
2477+ "0/*multi-line\n * \n /* comment \n /*comment*/*/ */ /comment*/1" ,
2478+ vec![
2479+ Token :: Number ( "0" . to_string( ) , false ) ,
2480+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2481+ "multi-line\n * \n /* comment \n /*comment*/*/ " . into( ) ,
2482+ ) ) ,
2483+ Token :: Whitespace ( Whitespace :: Space ) ,
2484+ Token :: Div ,
2485+ Token :: Word ( Word {
2486+ value: "comment" . to_string( ) ,
2487+ quote_style: None ,
2488+ keyword: Keyword :: COMMENT ,
2489+ } ) ,
2490+ Token :: Mul ,
2491+ Token :: Div ,
2492+ Token :: Number ( "1" . to_string( ) , false ) ,
2493+ ] ,
2494+ ) ,
2495+ (
2496+ "0/*multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/*/1" ,
2497+ vec![
2498+ Token :: Number ( "0" . to_string( ) , false ) ,
2499+ Token :: Whitespace ( Whitespace :: MultiLineComment (
2500+ "multi-line\n * \n /* comment \n /*comment/**/ */ /comment*/" . into( ) ,
2501+ ) ) ,
2502+ Token :: Number ( "1" . to_string( ) , false ) ,
2503+ ] ,
2504+ ) ,
2505+ (
2506+ "SELECT 1/* a /* b */ c */0" ,
2507+ vec![
2508+ Token :: make_keyword( "SELECT" ) ,
2509+ Token :: Whitespace ( Whitespace :: Space ) ,
2510+ Token :: Number ( "1" . to_string( ) , false ) ,
2511+ Token :: Whitespace ( Whitespace :: MultiLineComment ( " a /* b */ c " . to_string( ) ) ) ,
2512+ Token :: Number ( "0" . to_string( ) , false ) ,
2513+ ] ,
2514+ ) ,
2515+ ] ;
2516+
2517+ for ( sql, expected) in test_cases {
2518+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
2519+ compare ( expected, tokens) ;
2520+ }
2521+ }
2522+
2523+ #[ test]
2524+ fn tokenize_nested_multiline_comment_empty ( ) {
2525+ let sql = "select 1/*/**/*/0" ;
24702526
24712527 let dialect = GenericDialect { } ;
2472- let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
2528+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) . unwrap ( ) ;
24732529 let expected = vec ! [
2530+ Token :: make_keyword( "select" ) ,
2531+ Token :: Whitespace ( Whitespace :: Space ) ,
2532+ Token :: Number ( "1" . to_string( ) , false ) ,
2533+ Token :: Whitespace ( Whitespace :: MultiLineComment ( "/**/" . to_string( ) ) ) ,
24742534 Token :: Number ( "0" . to_string( ) , false ) ,
2535+ ] ;
2536+
2537+ compare ( expected, tokens) ;
2538+ }
2539+
2540+ #[ test]
2541+ fn tokenize_nested_comments_if_not_supported ( ) {
2542+ let dialect = SQLiteDialect { } ;
2543+ let sql = "SELECT 1/*/* nested comment */*/0" ;
2544+ let tokens = Tokenizer :: new ( & dialect, sql) . tokenize ( ) ;
2545+ let expected = vec ! [
2546+ Token :: make_keyword( "SELECT" ) ,
2547+ Token :: Whitespace ( Whitespace :: Space ) ,
2548+ Token :: Number ( "1" . to_string( ) , false ) ,
24752549 Token :: Whitespace ( Whitespace :: MultiLineComment (
2476- "multi-line \n * \n /* comment \n /* comment*/*/ */ /comment " . to_string( ) ,
2550+ "/* nested comment " . to_string( ) ,
24772551 ) ) ,
2478- Token :: Number ( "1" . to_string( ) , false ) ,
2552+ Token :: Mul ,
2553+ Token :: Div ,
2554+ Token :: Number ( "0" . to_string( ) , false ) ,
24792555 ] ;
2480- compare ( expected, tokens) ;
2556+
2557+ compare ( expected, tokens. unwrap ( ) ) ;
24812558 }
24822559
24832560 #[ test]
0 commit comments