00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #include "la_parser_writer.hh"
00056 #include "prop_registry.hh"
00057 #include "parser_spec.hh"
00058 #include "itoken_spec.hh"
00059 #include "ebnf_node_algo.hh"
00060 #include "ebnf_la_node.hh"
00061 #include "token_spec.hh"
00062
00063 void LaParserWriter::writeParser (ParserSpec &pSpec, ITokenSpec &tSpec,
00064 const string &scannerClass)
00065 {
00066 string throwClause;
00067 string includeMacro = className2macro(pSpec.className);
00068
00069 if (registry["USE_EXCEPTIONS"]) throwClause = " throw (ScanException, ParseException)";
00070
00072
00073
00074 openStream(fullPath(className2hh(pSpec.className)));
00075 line() << "#ifndef " << includeMacro;
00076 line() << "#define " << includeMacro;
00077 line();
00078 line() << "#include <string>";
00079 line() << "#include <exception>";
00080 line() << "using namespace std;";
00081 line();
00082 line() << "#include \"" << className2hh(tSpec.className) << "\"";
00083 line() << "#include \"" << className2hh(scannerClass) << "\"";
00084 if (!pSpec.preambleCode.empty())
00085 writeChunk(pSpec.preambleCode);
00086 line() << "namespace cppcc";
00087 line() << "{";
00088 line();
00089 line() << "class ParseException : public exception";
00090 line() << "{";
00091 indent();
00092 line() << "public:";
00093 indent();
00094 line();
00095 line() << "ParseException (const string &message_ = \"Parse exception\") :";
00096 line() << " message(message_)";
00097 line() << "{}";
00098 line();
00099 line() << "ParseException (const Position &pos_,";
00100 line() << " const string &message_ = \"Parse exception\") :";
00101 line() << " message(message_), pos(pos_)";
00102 line() << "{}";
00103 line();
00104 line() << "~ParseException () throw ()";
00105 line() << "{}";
00106 line();
00107 line() << "virtual operator string () const";
00108 line() << "{";
00109 indent();
00110 line() << "return string(pos) + \": \" + message;";
00111 unindent();
00112 line() << "}";
00113 line();
00114 line() << "virtual const char* what () throw ()";
00115 line() << "{";
00116 indent();
00117 line() << "return message.c_str();";
00118 unindent();
00119 line() << "}";
00120 line();
00121 line() << "virtual Position& where () throw ()";
00122 line() << "{";
00123 indent();
00124 line() << "return pos;";
00125 unindent();
00126 line() << "}";
00127 line();
00128 unindent();
00129 line() << "private:";
00130 line();
00131 indent();
00132 line() << "Position pos;";
00133 line() << "string message;";
00134 unindent();
00135 unindent();
00136 line() << "}; // end class PasreException";
00137 line();
00138 line();
00139 line() << "class " << pSpec.className << "_base";
00140 line() << "{";
00141 indent();
00142 line() << "protected:";
00143 indent();
00144 line() << "bool onParseError (ParseException &pex) { return false; }";
00145 unindent();
00146 unindent();
00147 line() << "};";
00148 line();
00149 line();
00150 line() << "class " << pSpec.className << " : public "
00151 << pSpec.className << "_base";
00152 if (!pSpec.inheritance.empty()) {
00153 ofs << ", ";
00154 writeChunk(pSpec.inheritance);
00155 }
00156 line() << "{";
00157 indent();
00158 line() << "public: // methods";
00159 indent();
00160 line();
00161 line() << pSpec.className << " (istream *in_ = NULL)" << throwClause << " :";
00162 line() << " scanner(in_), _la(0)";
00163 line() << "{}";
00164 line();
00165 line() << "// productions entry points:";
00166 line();
00167 writeProductionDecls(pSpec.productions);
00168 line();
00169 line() << "// productions LA methods:";
00170 line();
00171 writeProductionLaDecls(pSpec.productions);
00172 line();
00173 line() << "// lookahead entry points:";
00174 line();
00175 writeLaEpDecls(pSpec.productions);
00176 line();
00177 unindent();
00178 line() << "private: // methods";
00179 indent();
00180 line();
00181 line() << "void consumeToken (int tId) " << throwClause;
00182 line() << "{";
00183 indent();
00184 if (registry["DEBUG_PARSER"])
00185 line() << "cerr << \"trying to match a \" << "
00186 << tSpec.className
00187 << "::tokenNames[tId] << \"(\" << tId << \") token with "
00188 " the current la which is \" << "
00189 << tSpec.className
00190 << "::tokenNames[scanner.la()->id] << \"(\" << scanner.la()->id << \")\" << endl;";
00191 line() << "if ((token = scanner.la())->id != tId)";
00192 line() << "{";
00193 indent();
00194 line() << "ParseException pex(scanner.la()->bPos, \"Parse error.\");";
00195 line() << "if (!onParseError(pex)) throw pex;";
00196 unindent();
00197 line() << "}";
00198 line() << "scanner.unchecked_consume();";
00199 unindent();
00200 line() << "}";
00201 line();
00202 line() << "bool laConsumeToken (int tId) " << throwClause;
00203 line() << "{";
00204 indent();
00205 if (registry["DEBUG_PARSER"])
00206 line() << "cerr << \"looking ahead for a \" << "
00207 << tSpec.className
00208 << "::tokenNames[tId] << \"(\" << tId << \") token; "
00209 "the current la is \" << "
00210 << tSpec.className
00211 << "::tokenNames[scanner.la()->id] << \"(\" << scanner.la()->id << \") @ \" << (string) scanner.la()->bPos << endl;";
00212
00213
00214
00215
00216
00217 line() << "if (scanner.la()->id != tId) return false;";
00218 line() << "scanner.unchecked_consume();";
00219 line() << "return true;";
00220 unindent();
00221 line() << "}";
00222 line();
00223
00224 unindent();
00225 line() << "public: // data";
00226 indent();
00227 line();
00228 line() << scannerClass << " scanner;";
00229 line() << "int _la;";
00230 line() << tSpec.className << " *token;";
00231 line();
00232 unindent();
00233 if (!pSpec.userCode.empty()) {
00234 line() << "private: // user code";
00235 line();
00236 for (int i = 0; i < pSpec.userCode.size(); i++)
00237 writeChunk(pSpec.userCode[i]);
00238 line();
00239 }
00240 unindent();
00241 line() << "}; //end class " << pSpec.className;
00242 line();
00243 line();
00244 line() << "} //end namespace cppcc";
00245 line();
00246 line();
00247 line() << "#endif // ifndef " << includeMacro;
00248 closeStream();
00249
00251
00252
00253 openStream(fullPath(className2cc(pSpec.className)));
00254 line();
00255 line() << "#include \"" << className2hh(pSpec.className) << "\"";
00256 line();
00257 writeProductionDefs(pSpec.productions, pSpec.className, tSpec);
00258 writeLaDefs(pSpec.productions, pSpec.className, tSpec, registry);
00259 line();
00260 closeStream();
00261
00262 }
00263
00264
00265 void LaParserWriter::writeProductionDecls (vector<ProductionSpec> &prods)
00266 {
00267 string throwXtra;
00268 bool useExcepts = false;
00269
00270 if (registry["USE_EXCEPTIONS"]) {
00271 throwXtra = "ScanException, ParseException";
00272 useExcepts = true;
00273 }
00274
00275 for (int i = 0; i < prods.size(); i++)
00276 {
00277 ProductionSpec &p = prods[i];
00278 line();
00279 if (p.retType.empty()) ofs << "void";
00280 else writeChunk(p.retType);
00281 line() << " " << p.name << " (";
00282 if (p.formalArgs.empty()) ofs << ")";
00283 else writeChunk(p.formalArgs, ")");
00284 if (!p.exceptList.empty() || useExcepts) {
00285 line() << " throw (";
00286 if (!p.exceptList.empty()) {
00287 if (useExcepts)
00288 writeChunk(p.exceptList, ", ") << throwXtra << ")";
00289 else
00290 writeChunk(p.exceptList, ")");
00291 } else
00292 ofs << throwXtra << ")";
00293 }
00294 ofs << ";";
00295 }
00296
00297 }
00298
00299
00300 void LaParserWriter::writeProductionLaDecls (vector<ProductionSpec> &prods)
00301 {
00302 string throwClause;
00303
00304 if (registry["USE_EXCEPTIONS"])
00305 throwClause = " throw (ScanException)";
00306
00307 for (int i = 0; i < prods.size(); i++)
00308 {
00309 ProductionSpec &p = prods[i];
00310 if (p.expansion == NULL) {
00311 cerr << formatWarning(p.pos, string("Assuming lookahead for user-code production ") + p.name + " to be always true.") << endl;;
00312 line() << "bool __la_" << p.name << "_ ()" << throwClause << "{ return true; };";
00313 } else
00314 line() << "bool __la_" << p.name << "_ ()" << throwClause << ";";
00315 }
00316
00317 }
00318
00319
00325 class LaEpDeclWriter : public EbnfNodeAlgo
00326 {
00327 public:
00328
00329 LaEpDeclWriter (LaParserWriter &pw_, const string throwClause_) :
00330 pw(pw_), throwClause(throwClause_)
00331 {}
00332
00333 virtual bool operator() (EbnfNode &n) throw (EbnfNodeAlgoException)
00334 {
00335 EbnfLaNode &nn = dynamic_cast<EbnfLaNode&>(n);
00336
00337 if ((nn.laSpec != NULL) && (nn.laSpec->synLa != NULL))
00338 {
00339 if (nn.laSpec->ignore) {
00340 cerr << formatWarning(nn.getPos(), "Local lookahead at non-ambiguous choice point will be ignored.") << endl;
00341 return true;
00342 }
00343 NumberedLaSpec &la = dynamic_cast<NumberedLaSpec&>(*nn.laSpec);
00344 pw.line() << "bool __la_" << la.id << "_ (int depth)"
00345 << throwClause << ";";
00346 }
00347 return true;
00348 }
00349
00350 private:
00351
00352 LaParserWriter &pw;
00353 const string &throwClause;
00354 };
00355
00356
00357 void LaParserWriter::writeLaEpDecls(vector<ProductionSpec> &prods)
00358 {
00359 string throwClause;
00360 if (registry["USE_EXCEPTIONS"])
00361 throwClause = " throw (ScanException)";
00362
00363 LaEpDeclWriter x(*this, throwClause);
00364 for (int i = 0; i < prods.size(); i++)
00365 {
00366 if (prods[i].expansion != NULL) {
00367 line() << "// LA entry points for " << prods[i].name;
00368 line();
00369 prods[i].expansion->dfTraverse(x);
00370 }
00371 }
00372 line();
00373 }
00374
00375
00376 ostream& LaParserWriter::makeLaCondition (vBitset &tMask,
00377 NumberedLaSpec *la,
00378 ITokenSpec &tokens)
00379 {
00380 if (la != NULL && !la->ignore) {
00381 if (!la->semLa.empty()) {
00382 ofs << "(";
00383 writeChunk(la->semLa, ")") << " &&";
00384 }
00385 if (la->synLa != NULL) {
00386 ofs << "__la_" << la->id << "_(" << la->fixedLa << ")";
00387 }
00388 } else {
00389 if ((la != NULL) && !la->semLa.empty()) {
00390
00391 ofs << "(";
00392 writeChunk(la->semLa, ")") << " && (";
00393 }
00394 vBitset::reference ti = tMask.begin();
00395 bool first = true;
00396 for (int tid = 0; ti.neq(tMask.end()); ti.next(), tid++)
00397 if (ti.get()) {
00398 if (!first) ofs << " || ";
00399 else first = false;
00400 ofs << "(scanner.la()->id == "
00401 << tokens.className << "::" << tokens[tid].name()
00402 << ")";
00403 }
00404 if ((la != NULL) && !la->semLa.empty())
00405 ofs << ")";
00406 }
00407 return ofs;
00408 }
00409
00410
00411
00412 void LaParserWriter::writeEbnf (EbnfLaNode &n, ITokenSpec &tokens)
00413 {
00414 if (!n.startCode.empty()) {
00415 line() << "{";
00416 writeChunk(n.startCode, "}");
00417 }
00418
00419 if (n.catchList.size() != 0) {
00420 line() << "try {";
00421 indent();
00422 }
00423
00424 if (typeid(n) == typeid(EbnfOrNode)) {
00425 EbnfOrNode &nn = dynamic_cast<EbnfOrNode&>(n);
00426 line() << "if (";
00427 makeLaCondition(nn.pre->first, nn.pre->laSpec, tokens) << ") {";
00428 indent();
00429 writeEbnf(*nn.pre, tokens);
00430 unindent();
00431 line() << "} else { ";
00432 indent();
00433 writeEbnf(*nn.post, tokens);
00434 unindent();
00435 line() << "}";
00436 } else if (typeid(n) == typeid(EbnfCatNode)) {
00437 EbnfCatNode &nn = dynamic_cast<EbnfCatNode&>(n);
00438 writeEbnf(*nn.pre, tokens);
00439 writeEbnf(*nn.post, tokens);
00440 } else if (typeid(n) == typeid(EbnfStarNode)) {
00441 EbnfStarNode &nn = dynamic_cast<EbnfStarNode&>(n);
00442 line() << "while (";
00443 makeLaCondition(nn.in->first, nn.in->laSpec, tokens) << ") {";
00444 indent();
00445 writeEbnf(*nn.in, tokens);
00446 unindent();
00447 line() << "}";
00448 } else if (typeid(n) == typeid(EbnfTerminalNode)) {
00449 EbnfTerminalNode &nn = dynamic_cast<EbnfTerminalNode&>(n);
00450 line() << "consumeToken(" << tokens.className << "::" << nn.terminal.name()
00451 << ");";
00452 } else if (typeid(n) == typeid(EbnfNonterminalNode)) {
00453 EbnfNonterminalNode &nn = dynamic_cast<EbnfNonterminalNode&>(n);
00454 line();
00455 if (!nn.targetVar.empty()) writeChunk(nn.targetVar, " = ");
00456 ofs << nn.nontermName << "(";
00457 if (nn.actualArgs.empty()) ofs << ");";
00458 else writeChunk(nn.actualArgs, ");");
00459 } else if (typeid(n) == typeid(EbnfLambdaNode)) {
00460 line() << " /* lambda */";
00461 } else ASSERT(0, "Bad typeid in LaParserWriter::writeEbnf !");
00462
00463 if (n.catchList.size() != 0) {
00464 unindent();
00465 for (int i = 0; i < n.catchList.size(); i++) {
00466 if (i == 0) line () << "}";
00467 ofs << " catch (";
00468 writeChunk(n.catchList[i].exceptionDecl, ")") << " {";
00469 indent();
00470 writeChunk(n.catchList[i].code, ";");
00471 unindent();
00472 line() << "}";
00473 }
00474 }
00475
00476 if (!n.endCode.empty()) {
00477 line() << "{";
00478 writeChunk(n.endCode, "}");
00479 }
00480 }
00481
00482
00483 void LaParserWriter::writeProductionDefs (vector<ProductionSpec> &prods,
00484 const string &parserClass,
00485 ITokenSpec &tokens)
00486 {
00487 string throwXtra;
00488 bool useExcepts = false;
00489
00490 if (registry["USE_EXCEPTIONS"]) {
00491 throwXtra = "ScanException, ParseException";
00492 useExcepts = true;
00493 }
00494
00495 for (int i = 0; i < prods.size(); i++)
00496 {
00497 ProductionSpec &p = prods[i];
00498 line();
00499 if (p.retType.empty()) ofs << "void cppcc::";
00500 else writeChunk(p.retType, " cppcc::");
00501 ofs << parserClass << "::"
00502 << p.name << " (";
00503 if (p.formalArgs.empty()) ofs << ")";
00504 else writeChunk(p.formalArgs, ")");
00505 if (!p.exceptList.empty() || useExcepts) {
00506 line() << " ";
00507 ofs << " throw (";
00508 if (!p.exceptList.empty()) {
00509 if (useExcepts) writeChunk(p.exceptList, ",") << throwXtra << ")";
00510 else writeChunk(p.exceptList, ")");
00511 } else
00512 ofs << throwXtra << ")";
00513 }
00514 line() << "{";
00515 indent();
00516 if (registry["DEBUG_PARSER"])
00517 line() << "cerr << \"Entering " << p.name << ".\" << endl;";
00518 if (!p.preambleCode.empty()) writeChunk(p.preambleCode, ";");
00519 if (p.expansion != NULL)
00520 writeEbnf(dynamic_cast<EbnfLaNode&>(*p.expansion), tokens);
00521 if (registry["DEBUG_PARSER"])
00522 line() << "cerr << \"Leaving " << p.name << ".\" << endl;";
00523 unindent();
00524 line() << "}";
00525 line();
00526 line();
00527 }
00528 }
00529
00530
00531 void LaParserWriter::writeEbnfLa (EbnfLaNode &n, ITokenSpec &tokens)
00532 {
00533 if (n.forceStartCode && !n.startCode.empty()) {
00534 line() << "{";
00535 writeChunk(n.startCode, "}");
00536 }
00537
00538 if (typeid(n) == typeid(EbnfOrNode)) {
00539 EbnfOrNode &nn = dynamic_cast<EbnfOrNode&>(n);
00540 line() << "if (";
00541 makeLaCondition(nn.pre->first, nn.pre->laSpec, tokens) << ") {";
00542 indent();
00543 writeEbnfLa(*nn.pre, tokens);
00544 unindent();
00545 line() << "} else { ";
00546 indent();
00547 writeEbnfLa(*nn.post, tokens);
00548 unindent();
00549 line() << "}";
00550 } else if (typeid(n) == typeid(EbnfCatNode)) {
00551 EbnfCatNode &nn = dynamic_cast<EbnfCatNode&>(n);
00552 writeEbnfLa(*nn.pre, tokens);
00553 writeEbnfLa(*nn.post, tokens);
00554 } else if (typeid(n) == typeid(EbnfStarNode)) {
00555 EbnfStarNode &nn = dynamic_cast<EbnfStarNode&>(n);
00556 line() << "while (";
00557 makeLaCondition(nn.in->first, nn.in->laSpec, tokens) << ") {";
00558 indent();
00559 writeEbnfLa(*nn.in, tokens);
00560 unindent();
00561 line() << "}";
00562 } else if (typeid(n) == typeid(EbnfTerminalNode)) {
00563 EbnfTerminalNode &nn = dynamic_cast<EbnfTerminalNode&>(n);
00564 line() << "if (!laConsumeToken(" << tokens.className << "::"
00565 << nn.terminal.name() << ")) ";
00566 ofs << "goto __lookahead_fail_;";
00567 line() << "if (_la == 0) ";
00568 ofs << "goto __lookahead_succeed_;";
00569 } else if (typeid(n) == typeid(EbnfNonterminalNode)) {
00570 EbnfNonterminalNode &nn = dynamic_cast<EbnfNonterminalNode&>(n);
00571 line() << "if (!__la_" << nn.nontermName << "_()) ";
00572 ofs << "goto __lookahead_fail_;";
00573 line() << "if (_la == 0) ";
00574 ofs << "goto __lookahead_succeed_;";
00575 } else if (typeid(n) == typeid(EbnfLambdaNode)) {
00576 line() << " /* lambda */";
00577 } else ASSERT(0, "Bad typeid in LaParserWriter::writeEbnfLa !");
00578
00579 if (n.forceEndCode && !n.endCode.empty()) {
00580 line() << "{";
00581 writeChunk(n.endCode, "}");
00582 }
00583 }
00584
00585
00590 class LaEpDefWriter : public EbnfNodeAlgo
00591 {
00592 public:
00593
00594 LaEpDefWriter (const string &parserClass_, const string &throwClause_,
00595 ITokenSpec &tokens_, LaParserWriter &pw_,
00596 PropRegistry ®istry_) :
00597 parserClass(parserClass_), throwClause(throwClause_), tokens(tokens_),
00598 pw(pw_), registry(registry_)
00599 {}
00600
00601 virtual bool operator() (EbnfNode &n) throw (EbnfNodeAlgoException)
00602 {
00603 EbnfLaNode &nn = dynamic_cast<EbnfLaNode&>(n);
00604
00605 if ((nn.laSpec != NULL) && (nn.laSpec->synLa != NULL))
00606 {
00607 if (nn.laSpec->ignore)
00608 return true;
00609
00610 NumberedLaSpec &la = dynamic_cast<NumberedLaSpec&>(*nn.laSpec);
00611 pw.line() << "bool cppcc::" << parserClass << "::__la_" << la.id
00612 << "_ (int depth)" << throwClause;
00613 pw.line() << "{";
00614 pw.indent();
00615 if (registry["DEBUG_PARSER"])
00616 pw.line() << "cerr << \"LOOKAHEAD#" << la.id
00617 << "(\" << depth << \") ...\" << endl;";
00618 pw.line() << "int _laSave = _la;";
00619 pw.line() << "_la = depth;";
00620 pw.line() << "scanner.setMarker();";
00621 pw.line();
00622 pw.writeEbnfLa(dynamic_cast<EbnfLaNode&>(*nn.laSpec->synLa),
00623 tokens);
00624 pw.line() << "goto __lookahead_succeed_;";
00625 pw.line();
00626 pw.unindent();
00627 pw.line() << "__lookahead_fail_:";
00628 pw.indent();
00629 if (registry["DEBUG_PARSER"])
00630 pw.line() << "cerr << \"LOOKAHEAD#" << la.id
00631 << "(\" << depth << \") FAILED\" << endl;";
00632 pw.line() << "_la = _laSave;";
00633 pw.line() << "scanner.rewindToMarker();";
00634 if (registry["DEBUG_PARSER"])
00635 pw.line() << "if (_la == 0) cerr << \"Back in parsing mode.\" << endl;";
00636 pw.line() << "return false;";
00637 pw.unindent();
00638 pw.line();
00639 pw.line() << "__lookahead_succeed_:";
00640 pw.indent();
00641 if (registry["DEBUG_PARSER"])
00642 pw.line() << "cerr << \"LOOKAHEAD#" << la.id
00643 << "(\" << depth << \") SUCCEEDED\" << endl;";
00644 pw.line() << "_la = _laSave;";
00645 pw.line() << "scanner.rewindToMarker();";
00646 if (registry["DEBUG_PARSER"])
00647 pw.line() << "if (_la == 0) cerr << \"Back in parsing mode.\" << endl;";
00648 pw.line() << "return true;";
00649 pw.unindent();
00650 pw.line() << "}";
00651 pw.line();
00652 pw.line();
00653 }
00654 return true;
00655 }
00656
00657 private:
00658
00659 PropRegistry ®istry;
00660 string parserClass;
00661 string throwClause;
00662 ITokenSpec &tokens;
00663 LaParserWriter &pw;
00664 };
00665
00666
00667 void LaParserWriter::writeLaDefs (vector<ProductionSpec> &prods,
00668 const string &parserClass,
00669 ITokenSpec &tokens, PropRegistry ®istry)
00670 {
00671 string throwClause;
00672
00673 if (registry["USE_EXCEPTIONS"])
00674 throwClause = " throw (ScanException)";
00675
00676 LaEpDefWriter x(parserClass, throwClause, tokens, *this, registry);
00677
00678 for (int i = 0; i < prods.size(); i++)
00679 {
00680 ProductionSpec &p = prods[i];
00681 if (p.expansion == NULL) continue;
00682
00683 line() << "bool cppcc::" << parserClass << "::__la_" << p.name
00684 << "_ ()" << throwClause;
00685 line() << "{";
00686 indent();
00687 if (registry["DEBUG_PARSER"])
00688 line() << "cerr << \"Looking ahead for a " << p.name << ".\" << endl;";
00689 writeEbnfLa(*p.expansion, tokens);
00690 line();
00691 line() << "__lookahead_succeed_:";
00692 indent();
00693 if (registry["DEBUG_PARSER"])
00694 line() << "cerr << \"Lookahead for " << p.name << " succeeded.\" << endl;";
00695 line() << "return true;";
00696 line();
00697 unindent();
00698 line() << "__lookahead_fail_:";
00699 indent();
00700 if (registry["DEBUG_PARSER"])
00701 line() << "cerr << \"Lookahead for " << p.name << " failed.\" << endl;";
00702 line() << "return false;";
00703 unindent();
00704 unindent();
00705 line() << "}";
00706 line();
00707 line();
00708 p.expansion->dfTraverse(x);
00709 }
00710 }