|
465 | 465 | for the respective Unicode encoding form.
|
466 | 466 | \indextext{character set|)}
|
467 | 467 |
|
| 468 | +\rSec1[lex.comment]{Comments} |
| 469 | + |
| 470 | +\pnum |
| 471 | +\indextext{comment|(}% |
| 472 | +\indextext{comment!\tcode{/*} \tcode{*/}}% |
| 473 | +\indextext{comment!\tcode{//}}% |
| 474 | +The characters \tcode{/*} start a comment, which terminates with the |
| 475 | +characters \tcode{*/}. These comments do not nest. |
| 476 | +\indextext{comment!\tcode{//}}% |
| 477 | +The characters \tcode{//} start a comment, which terminates immediately before the |
| 478 | +next new-line character. If there is a form-feed or a vertical-tab |
| 479 | +character in such a comment, only whitespace characters shall appear |
| 480 | +between it and the new-line that terminates the comment; no diagnostic |
| 481 | +is required. |
| 482 | +\begin{note} |
| 483 | +The comment characters \tcode{//}, \tcode{/*}, |
| 484 | +and \tcode{*/} have no special meaning within a \tcode{//} comment and |
| 485 | +are treated just like other characters. Similarly, the comment |
| 486 | +characters \tcode{//} and \tcode{/*} have no special meaning within a |
| 487 | +\tcode{/*} comment. |
| 488 | +\end{note} |
| 489 | +\indextext{comment|)} |
| 490 | + |
468 | 491 | \rSec1[lex.pptoken]{Preprocessing tokens}
|
469 | 492 |
|
470 | 493 | \indextext{token!preprocessing|(}%
|
471 | 494 | \begin{bnf}
|
472 | 495 | \nontermdef{preprocessing-token}\br
|
473 |
| - header-name\br |
474 | 496 | import-keyword\br
|
475 | 497 | module-keyword\br
|
476 | 498 | export-keyword\br
|
477 |
| - identifier\br |
| 499 | + header-name\br |
478 | 500 | pp-number\br
|
| 501 | + preprocessing-op-or-punc\br |
| 502 | + identifier\br |
479 | 503 | character-literal\br
|
480 | 504 | user-defined-character-literal\br
|
481 | 505 | string-literal\br
|
482 | 506 | user-defined-string-literal\br
|
483 |
| - preprocessing-op-or-punc\br |
484 | 507 | \textnormal{each non-whitespace character that cannot be one of the above}
|
485 | 508 | \end{bnf}
|
486 | 509 |
|
487 |
| -\pnum |
488 |
| -Each preprocessing token that is converted to a token\iref{lex.token} |
489 |
| -shall have the lexical form of a keyword, an identifier, a literal, |
490 |
| -or an operator or punctuator. |
491 |
| - |
492 | 510 | \pnum
|
493 | 511 | A preprocessing token is the minimal lexical element of the language in translation
|
494 | 512 | phases 3 through 6.
|
|
523 | 541 | between the quotation characters in a character literal or
|
524 | 542 | string literal.
|
525 | 543 |
|
| 544 | +\pnum |
| 545 | +Each preprocessing token that is converted to a token\iref{lex.token} |
| 546 | +shall have the lexical form of a keyword, an identifier, a literal, |
| 547 | +or an operator or punctuator. |
| 548 | + |
| 549 | +\pnum |
| 550 | +The \grammarterm{import-keyword} is produced |
| 551 | +by processing an \keyword{import} directive\iref{cpp.import}, |
| 552 | +the \grammarterm{module-keyword} is produced |
| 553 | +by preprocessing a \keyword{module} directive\iref{cpp.module}, and |
| 554 | +the \grammarterm{export-keyword} is produced |
| 555 | +by preprocessing either of the previous two directives. |
| 556 | +\begin{note} |
| 557 | +None has any observable spelling. |
| 558 | +\end{note} |
| 559 | + |
526 | 560 | \pnum
|
527 | 561 | If the input stream has been parsed into preprocessing tokens up to a
|
528 | 562 | given character:
|
|
562 | 596 | \end{itemize}
|
563 | 597 | \end{itemize}
|
564 | 598 |
|
| 599 | +\pnum |
565 | 600 | \begin{example}
|
566 | 601 | \begin{codeblock}
|
567 | 602 | #define R "x"
|
568 | 603 | const char* s = R"y"; // ill-formed raw string, not \tcode{"x" "y"}
|
569 | 604 | \end{codeblock}
|
570 | 605 | \end{example}
|
571 | 606 |
|
572 |
| -\pnum |
573 |
| -The \grammarterm{import-keyword} is produced |
574 |
| -by preprocessing an \keyword{import} directive\iref{cpp.import}, |
575 |
| -the \grammarterm{module-keyword} is produced |
576 |
| -by preprocessing a \keyword{module} directive\iref{cpp.module}, and |
577 |
| -the \grammarterm{export-keyword} is produced |
578 |
| -by preprocessing either of the previous two directives. |
579 |
| -\begin{note} |
580 |
| -None has any observable spelling. |
581 |
| -\end{note} |
582 |
| - |
583 | 607 | \pnum
|
584 | 608 | \begin{example}
|
585 | 609 | The program fragment \tcode{0xe+foo} is parsed as a
|
|
602 | 626 | \end{example}
|
603 | 627 | \indextext{token!preprocessing|)}
|
604 | 628 |
|
605 |
| -\rSec1[lex.digraph]{Alternative tokens} |
606 |
| - |
607 |
| -\pnum |
608 |
| -\indextext{token!alternative|(}% |
609 |
| -Alternative token representations are provided for some operators and |
610 |
| -punctuators. |
611 |
| -\begin{footnote} |
612 |
| -\indextext{digraph}% |
613 |
| -These include ``digraphs'' and additional reserved words. The term |
614 |
| -``digraph'' (token consisting of two characters) is not perfectly |
615 |
| -descriptive, since one of the alternative \grammarterm{preprocessing-token}s is |
616 |
| -\tcode{\%:\%:} and of course several primary tokens contain two |
617 |
| -characters. Nonetheless, those alternative tokens that aren't lexical |
618 |
| -keywords are colloquially known as ``digraphs''. |
619 |
| -\end{footnote} |
620 |
| - |
621 |
| -\pnum |
622 |
| -In all respects of the language, each alternative token behaves the |
623 |
| -same, respectively, as its primary token, except for its spelling. |
624 |
| -\begin{footnote} |
625 |
| -Thus the ``stringized'' values\iref{cpp.stringize} of |
626 |
| -\tcode{[} and \tcode{<:} will be different, maintaining the source |
627 |
| -spelling, but the tokens can otherwise be freely interchanged. |
628 |
| -\end{footnote} |
629 |
| -The set of alternative tokens is defined in |
630 |
| -\tref{lex.digraph}. |
631 |
| - |
632 |
| -\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary} |
633 |
| -\tcode{<\%} & \tcode{\{} & |
634 |
| -\keyword{and} & \tcode{\&\&} & |
635 |
| -\keyword{and_eq} & \tcode{\&=} \\ \rowsep |
636 |
| -\tcode{\%>} & \tcode{\}} & |
637 |
| -\keyword{bitor} & \tcode{|} & |
638 |
| -\keyword{or_eq} & \tcode{|=} \\ \rowsep |
639 |
| -\tcode{<:} & \tcode{[} & |
640 |
| -\keyword{or} & \tcode{||} & |
641 |
| -\keyword{xor_eq} & \tcode{\caret=} \\ \rowsep |
642 |
| -\tcode{:>} & \tcode{]} & |
643 |
| -\keyword{xor} & \tcode{\caret} & |
644 |
| -\keyword{not} & \tcode{!} \\ \rowsep |
645 |
| -\tcode{\%:} & \tcode{\#} & |
646 |
| -\keyword{compl} & \tcode{\~} & |
647 |
| -\keyword{not_eq} & \tcode{!=} \\ \rowsep |
648 |
| -\tcode{\%:\%:} & \tcode{\#\#} & |
649 |
| -\keyword{bitand} & \tcode{\&} & |
650 |
| - & \\ |
651 |
| -\end{tokentable}% |
652 |
| -\indextext{token!alternative|)} |
653 |
| - |
654 |
| -\rSec1[lex.token]{Tokens} |
655 |
| - |
656 |
| -\indextext{token|(}% |
657 |
| -\begin{bnf} |
658 |
| -\nontermdef{token}\br |
659 |
| - identifier\br |
660 |
| - keyword\br |
661 |
| - literal\br |
662 |
| - operator-or-punctuator |
663 |
| -\end{bnf} |
664 |
| - |
665 |
| -\pnum |
666 |
| -\indextext{\idxgram{token}}% |
667 |
| -There are five kinds of tokens: identifiers, keywords, literals,% |
668 |
| -\begin{footnote} |
669 |
| -Literals include strings and character and numeric literals. |
670 |
| -\end{footnote} |
671 |
| -operators, and other separators. |
672 |
| -\indextext{whitespace}% |
673 |
| -Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments |
674 |
| -(collectively, ``whitespace''), as described below, are ignored except |
675 |
| -as they serve to separate tokens. |
676 |
| -\begin{note} |
677 |
| -Whitespace can separate otherwise adjacent identifiers, keywords, numeric |
678 |
| -literals, and alternative tokens containing alphabetic characters. |
679 |
| -\end{note} |
680 |
| -\indextext{token|)} |
681 |
| - |
682 |
| -\rSec1[lex.comment]{Comments} |
683 |
| - |
684 |
| -\pnum |
685 |
| -\indextext{comment|(}% |
686 |
| -\indextext{comment!\tcode{/*} \tcode{*/}}% |
687 |
| -\indextext{comment!\tcode{//}}% |
688 |
| -The characters \tcode{/*} start a comment, which terminates with the |
689 |
| -characters \tcode{*/}. These comments do not nest. |
690 |
| -\indextext{comment!\tcode{//}}% |
691 |
| -The characters \tcode{//} start a comment, which terminates immediately before the |
692 |
| -next new-line character. If there is a form-feed or a vertical-tab |
693 |
| -character in such a comment, only whitespace characters shall appear |
694 |
| -between it and the new-line that terminates the comment; no diagnostic |
695 |
| -is required. |
696 |
| -\begin{note} |
697 |
| -The comment characters \tcode{//}, \tcode{/*}, |
698 |
| -and \tcode{*/} have no special meaning within a \tcode{//} comment and |
699 |
| -are treated just like other characters. Similarly, the comment |
700 |
| -characters \tcode{//} and \tcode{/*} have no special meaning within a |
701 |
| -\tcode{/*} comment. |
702 |
| -\end{note} |
703 |
| -\indextext{comment|)} |
704 |
| - |
705 | 629 | \rSec1[lex.header]{Header names}
|
706 | 630 |
|
707 | 631 | \indextext{header!name|(}%
|
|
791 | 715 | a \grammarterm{floating-point-literal} token.%
|
792 | 716 | \indextext{number!preprocessing|)}
|
793 | 717 |
|
| 718 | +\rSec1[lex.operators]{Operators and punctuators} |
| 719 | + |
| 720 | +\pnum |
| 721 | +\indextext{operator|(}% |
| 722 | +\indextext{punctuator|(}% |
| 723 | +The lexical representation of \Cpp{} programs includes a number of |
| 724 | +preprocessing tokens that are used in the syntax of the preprocessor or |
| 725 | +are converted into tokens for operators and punctuators: |
| 726 | + |
| 727 | +\begin{bnf} |
| 728 | +\nontermdef{preprocessing-op-or-punc}\br |
| 729 | + preprocessing-operator\br |
| 730 | + operator-or-punctuator |
| 731 | +\end{bnf} |
| 732 | + |
| 733 | +\begin{bnf} |
| 734 | +%% Ed. note: character protrusion would misalign various operators. |
| 735 | +\microtypesetup{protrusion=false}\obeyspaces |
| 736 | +\nontermdef{preprocessing-operator} \textnormal{one of}\br |
| 737 | + \terminal{\# \#\# \%: \%:\%:} |
| 738 | +\end{bnf} |
| 739 | + |
| 740 | +\begin{bnf} |
| 741 | +\microtypesetup{protrusion=false}\obeyspaces |
| 742 | +\nontermdef{operator-or-punctuator} \textnormal{one of}\br |
| 743 | + \terminal{\{ \} [ ] ( )}\br |
| 744 | + \terminal{<: :> <\% \%> ; : ...}\br |
| 745 | + \terminal{? :: . .* -> ->* \~}\br |
| 746 | + \terminal{! + - * / \% \caret{} \& |}\br |
| 747 | + \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br |
| 748 | + \terminal{== != < > <= >= <=> \&\& ||}\br |
| 749 | + \terminal{<< >> <<= >>= ++ -- ,}\br |
| 750 | + \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br |
| 751 | + \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} |
| 752 | +\end{bnf} |
| 753 | + |
| 754 | +Each \grammarterm{operator-or-punctuator} is converted to a single token |
| 755 | +in translation phase 7\iref{lex.phases}.% |
| 756 | +\indextext{punctuator|)}% |
| 757 | +\indextext{operator|)} |
| 758 | + |
| 759 | +\rSec1[lex.digraph]{Alternative tokens} |
| 760 | + |
| 761 | +\pnum |
| 762 | +\indextext{token!alternative|(}% |
| 763 | +Alternative token representations are provided for some operators and |
| 764 | +punctuators. |
| 765 | +\begin{footnote} |
| 766 | +\indextext{digraph}% |
| 767 | +These include ``digraphs'' and additional reserved words. The term |
| 768 | +``digraph'' (token consisting of two characters) is not perfectly |
| 769 | +descriptive, since one of the alternative \grammarterm{preprocessing-token}s is |
| 770 | +\tcode{\%:\%:} and of course several primary tokens contain two |
| 771 | +characters. Nonetheless, those alternative tokens that aren't lexical |
| 772 | +keywords are colloquially known as ``digraphs''. |
| 773 | +\end{footnote} |
| 774 | + |
| 775 | +\pnum |
| 776 | +In all respects of the language, each alternative token behaves the |
| 777 | +same, respectively, as its primary token, except for its spelling. |
| 778 | +\begin{footnote} |
| 779 | +Thus the ``stringized'' values\iref{cpp.stringize} of |
| 780 | +\tcode{[} and \tcode{<:} will be different, maintaining the source |
| 781 | +spelling, but the tokens can otherwise be freely interchanged. |
| 782 | +\end{footnote} |
| 783 | +The set of alternative tokens is defined in |
| 784 | +\tref{lex.digraph}. |
| 785 | + |
| 786 | +\begin{tokentable}{Alternative tokens}{lex.digraph}{Alternative}{Primary} |
| 787 | +\tcode{<\%} & \tcode{\{} & |
| 788 | +\keyword{and} & \tcode{\&\&} & |
| 789 | +\keyword{and_eq} & \tcode{\&=} \\ \rowsep |
| 790 | +\tcode{\%>} & \tcode{\}} & |
| 791 | +\keyword{bitor} & \tcode{|} & |
| 792 | +\keyword{or_eq} & \tcode{|=} \\ \rowsep |
| 793 | +\tcode{<:} & \tcode{[} & |
| 794 | +\keyword{or} & \tcode{||} & |
| 795 | +\keyword{xor_eq} & \tcode{\caret=} \\ \rowsep |
| 796 | +\tcode{:>} & \tcode{]} & |
| 797 | +\keyword{xor} & \tcode{\caret} & |
| 798 | +\keyword{not} & \tcode{!} \\ \rowsep |
| 799 | +\tcode{\%:} & \tcode{\#} & |
| 800 | +\keyword{compl} & \tcode{\~} & |
| 801 | +\keyword{not_eq} & \tcode{!=} \\ \rowsep |
| 802 | +\tcode{\%:\%:} & \tcode{\#\#} & |
| 803 | +\keyword{bitand} & \tcode{\&} & |
| 804 | + & \\ |
| 805 | +\end{tokentable}% |
| 806 | +\indextext{token!alternative|)} |
| 807 | + |
794 | 808 | \rSec1[lex.name]{Identifiers}
|
795 | 809 |
|
796 | 810 | \indextext{identifier|(}%
|
|
912 | 926 | \end{itemize}%
|
913 | 927 | \indextext{identifier|)}
|
914 | 928 |
|
| 929 | +\rSec1[lex.token]{Tokens} |
| 930 | + |
| 931 | +\indextext{token|(}% |
| 932 | +\begin{bnf} |
| 933 | +\nontermdef{token}\br |
| 934 | + identifier\br |
| 935 | + keyword\br |
| 936 | + literal\br |
| 937 | + operator-or-punctuator |
| 938 | +\end{bnf} |
| 939 | + |
| 940 | +\pnum |
| 941 | +\indextext{\idxgram{token}}% |
| 942 | +There are five kinds of tokens: identifiers, keywords, literals,% |
| 943 | +\begin{footnote} |
| 944 | +Literals include strings and character and numeric literals. |
| 945 | +\end{footnote} |
| 946 | +operators, and other separators. |
| 947 | +\indextext{whitespace}% |
| 948 | +Blanks, horizontal and vertical tabs, newlines, formfeeds, and comments |
| 949 | +(collectively, ``whitespace''), as described below, are ignored except |
| 950 | +as they serve to separate tokens. |
| 951 | +\begin{note} |
| 952 | +Whitespace can separate otherwise adjacent identifiers, keywords, numeric |
| 953 | +literals, and alternative tokens containing alphabetic characters. |
| 954 | +\end{note} |
| 955 | +\indextext{token|)} |
| 956 | + |
915 | 957 | \rSec1[lex.key]{Keywords}
|
916 | 958 |
|
917 | 959 | \begin{bnf}
|
|
1036 | 1078 | \indextext{keyword|)}%
|
1037 | 1079 |
|
1038 | 1080 |
|
1039 |
| -\rSec1[lex.operators]{Operators and punctuators} |
1040 |
| - |
1041 |
| -\pnum |
1042 |
| -\indextext{operator|(}% |
1043 |
| -\indextext{punctuator|(}% |
1044 |
| -The lexical representation of \Cpp{} programs includes a number of |
1045 |
| -preprocessing tokens that are used in the syntax of the preprocessor or |
1046 |
| -are converted into tokens for operators and punctuators: |
1047 |
| - |
1048 |
| -\begin{bnf} |
1049 |
| -\nontermdef{preprocessing-op-or-punc}\br |
1050 |
| - preprocessing-operator\br |
1051 |
| - operator-or-punctuator |
1052 |
| -\end{bnf} |
1053 |
| - |
1054 |
| -\begin{bnf} |
1055 |
| -%% Ed. note: character protrusion would misalign various operators. |
1056 |
| -\microtypesetup{protrusion=false}\obeyspaces |
1057 |
| -\nontermdef{preprocessing-operator} \textnormal{one of}\br |
1058 |
| - \terminal{\# \#\# \%: \%:\%:} |
1059 |
| -\end{bnf} |
1060 |
| - |
1061 |
| -\begin{bnf} |
1062 |
| -\microtypesetup{protrusion=false}\obeyspaces |
1063 |
| -\nontermdef{operator-or-punctuator} \textnormal{one of}\br |
1064 |
| - \terminal{\{ \} [ ] ( )}\br |
1065 |
| - \terminal{<: :> <\% \%> ; : ...}\br |
1066 |
| - \terminal{? :: . .* -> ->* \~}\br |
1067 |
| - \terminal{! + - * / \% \caret{} \& |}\br |
1068 |
| - \terminal{= += -= *= /= \%= \caret{}= \&= |=}\br |
1069 |
| - \terminal{== != < > <= >= <=> \&\& ||}\br |
1070 |
| - \terminal{<< >> <<= >>= ++ -- ,}\br |
1071 |
| - \terminal{\keyword{and} \keyword{or} \keyword{xor} \keyword{not} \keyword{bitand} \keyword{bitor} \keyword{compl}}\br |
1072 |
| - \terminal{\keyword{and_eq} \keyword{or_eq} \keyword{xor_eq} \keyword{not_eq}} |
1073 |
| -\end{bnf} |
1074 |
| - |
1075 |
| -Each \grammarterm{operator-or-punctuator} is converted to a single token |
1076 |
| -in translation phase 7\iref{lex.phases}.% |
1077 |
| -\indextext{punctuator|)}% |
1078 |
| -\indextext{operator|)} |
1079 |
| - |
1080 | 1081 | \rSec1[lex.literal]{Literals}%
|
1081 | 1082 | \indextext{literal|(}
|
1082 | 1083 |
|
|
0 commit comments