The following
EBNF-like
grammar (for
Niklaus Wirth's
PL/0 programming language, from
Algorithms + Data Structures = Programs) is in
LL(1) form: program = block "." . block = ["const" ident "=" number {"," ident "=" number} ";"] ["var" ident {"," ident} ";"] {"procedure" ident ";" block ";"} statement . statement = ident ":=" expression | "call" ident | "begin" statement {";" statement } "end" | "if" condition "then" statement | "while" condition "do" statement . condition = "odd" expression | expression ("="|"#"|""|">=") expression . expression = ["+"|"-"] term {("+"|"-") term} . term = factor {("*"|"/") factor} . factor = ident | number | "(" expression ")" .
Terminals are expressed in quotes. Each
nonterminal is defined by a rule in the grammar, except for
ident and
number, which are assumed to be implicitly defined.
C implementation What follows is an implementation of a recursive descent parser for the above language in
C. The parser reads in source code, and exits with an error message if the code fails to parse, exiting silently if the code parses correctly. Notice how closely the predictive parser below mirrors the grammar above. There is a procedure for each nonterminal in the grammar. Parsing descends in a top-down manner until the final nonterminal has been processed. The program fragment depends the functions peeksym, which peeks at the current symbol; consumesym, which consumes the symbol to move to the next; and error, which displays an error message. These functions are assumed to be provided by the lexer. extern void error(const char msg[]); extern void consumesym(); typedef enum Symbol { ident, number, lparen, rparen, times, slash, plus, minus, eql, neq, lss, leq, gtr, geq, callsym, beginsym, semicolon, endsym, ifsym, whilesym, becomes, thensym, dosym, constsym, comma, varsym, procsym, period, oddsym } Symbol; extern Symbol peeksym(); bool accept(Symbol s) { if (peeksym() == s) { consumesym(); return true; } return false; } bool expect(Symbol s) { if (accept(s)) { return true; } error("expect: unexpected symbol"); return false; } void factor() { if (accept(ident) || accept(number)) { return; } if (accept(lparen)) { expression(); expect(rparen); } else { error("factor: syntax error"); consumesym(); } } void term() { factor(); while (peeksym() == times || peeksym() == slash) { consumesym(); factor(); } } void expression() { if (peeksym() == plus || peeksym() == minus) { consumesym(); } term(); while (peeksym() == plus || peeksym() == minus) { consumesym(); term(); } } void condition() { if (accept(oddsym)) { expression(); return; } expression(); if (peeksym() == eql || peeksym() == neq || peeksym() == lss || peeksym() == leq || peeksym() == gtr || peeksym() == geq) { consumesym(); expression(); } else { error("condition: invalid operator"); consumesym(); } } void statement() { if (accept(ident)) { expect(becomes); expression(); } else if (accept(callsym)) { expect(ident); } else if (accept(beginsym)) { do { statement(); } while (accept(semicolon)); expect(endsym); } else if (accept(ifsym)) { condition(); expect(thensym); statement(); } else if (accept(whilesym)) { condition(); expect(dosym); statement(); } else { error("statement: syntax error"); consumesym(); } } void block() { if (accept(constsym)) { do { expect(ident); expect(eql); expect(number); } while (accept(comma)); expect(semicolon); } if (accept(varsym)) { do { expect(ident); } while (accept(comma)); expect(semicolon); } while (accept(procsym)) { expect(ident); expect(semicolon); block(); expect(semicolon); } statement(); } void program() { block(); expect(period); } == Examples ==