Subsections


B. Complete CppCC grammar


B..1 The CppCC input lexical structure

The CppCC's input grammar language is case-sensitive. C or C++ style comments cab be freely used anywhere within the file.

The reserved words used by CppCC are:

OPTIONS TOKEN SCANNER SPECIAL SKIP PARSER LOOKAHEAD throw catch

Note that these are only reserved words outside a c_block construct (see below).


B..2 The CppCC input syntax

<grammar_file> -> [ <options_section> ] 
                  <token_customization_section> 
                  <lexical_section> <syntax_section> 
 
<options_section> -> "OPTIONS" "{" ( <option_name> "=" <option_value> )* "}" 
 
<token_customization_section> -> [ <c_block> ] 
                                 "TOKEN" <c_token_class_id> [ <inheritance> ] 
                                 <c_block> 
 
<lexical_section> -> [ <c_block> ] "SCANNER" <c_scanner_class_id> [ <inheritance> ] 
                     "{" <scanner_decl> * "}" 
 
<scanner_decl> ->  <c_block> 
                 | [ <lexical_states_list> ] <token_kind> "{" <token_decl> * "}" 
                 | "SPECIAL" "{" <special_token_decl> * "}" 
 
<token_kind> -> "TOKEN" | "SKIP" | "MORE" | "KEYWORD" 
 
<lexical_state_list> -> "<" ( "*" | <c_state_id> ( "," <c_state_id> )* ) ">" 
 
<token_decl> ->  "<#" <c_token_id> ":" <token_regexp> ">" 
               | "<" <c_token_id> ":" <token_regexp> ">" [ <c_block> ] 
 
<special_token_decl> -> <c_token_id> 
 
<token_regexp> -> <regexp_or_list> 
 
<regexp_or_list> -> <regexp_cat_list> ( "|" <regexp_cat_list> )* 
 
<regexp_cat_list> -> <regexp_term> + 
 
<regexp_term> -> <regexp_atom> [ "+" | "?" | "*" ] 
 
<regexp_atom> ->  <c_string_literal> 
                | "<" <c_terminal_id> ">" 
                | <character_list> 
                | "(" <regexp_or_list> ")" 
 
<character_list> -> [""] "[" <character_descriptor> ( "," <character_descriptor> )* "]" 
 
<character_descriptor> -> <c_character_literal> [ "-" <c_character_literal> ] 
 
 
<syntax_section> -> [ <c_block> ] "PARSER" <c_parser_class_id> [ <inheritance> ] 
                    "{" <parser_decl> * "}" 
 
<parser_decl> ->  <c_block> 
                | <production> 
 
<production> -> ( <c_type_decl> ) <c_nonterminal_id> "(" <c_formal_args_list> ")" 
                [ <throw_clause> ] 
                "{" <c_block> <or_list> "}" 
 
<throw_clause> -> "throw" "(" [ <c_type_id> ( "," <c_type_id> )* ] ")" 
 
<or_list> -> <cat_list> ( "|" <cat_list> )* 
 
<cat_list> -> <expansion> + 
 
<expansion> -> [ <lookahead> ] [ <c_block> ]["!"] <atom> [ "+" | "?" | "*" ] 
               [ <catch_clauses> ][ <c_block> ]["!"] 
 
<lookahead> -> "LOOKAHEAD" "(" [ <integer_literal> ] [","] 
                               [ <or_list> ] [","] 
                               [ <c_expression> ] ")" 
 
<atom> ->  [ <c_identifier> "=" ] ( <nonterminal_call> | <c_terminal_id> ) 
         | "(" <or_list> ")" 
 
<nonterminal_call> -> <c_nonterminal_id> "(" <c_actual_args_list> ")" 
 
<catch_clauses> -> <catch_clause> + 
 
<catch_clause> -> "catch" "(" <exception_id_specs> ")" <c_block> 
 
<exception_id_specs> -> "..." | <c_exception_decl> ( "," <c_exception_decl> )* 
 
<c_exception_decl> -> <c_type_id> <c_exception_id> 
 
<c_terminal_id> -> "<" <c_token_id> ">" 
 
<c_token_class_id> -> c++_identifier 
 
<c_state_id> -> c++_identifier 
 
<c_token_id> -> c++_identifier 
 
<c_scanner_class_id> -> c++_identifier 
 
<c_parser_class_id> -> c++_identifier 
 
<c_nonterminal_id> -> c++_identifier 
 
<c_exception_id> -> c++_identifier 
 
<c_type_decl> -> c++_type_specification 
 
<c_block> -> "{" c++_code_with_balanced_parentheses "}" 
 
<c_expression> -> "{" c++_expression "}" 
 
<c_character_literal> -> c++_character_literal 


Alec Panovici 2003-02-01