00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 #include "../config.h"
00075
00076 #ifdef HAVE_STAT
00077 #include <sys/types.h>
00078 #include <sys/stat.h>
00079 #include <unistd.h>
00080 #endif
00081
00082 #include <fstream>
00083 #include <string>
00084 #include <vector>
00085 #include <map>
00086 #include <algorithm>
00087 #include <cctype>
00088 #include <sstream>
00089 #include <list>
00090 #include <functional>
00091 using namespace std;
00092
00093
00094 #include "scanner_dfa_writer.hh"
00095 #include "prop_registry.hh"
00096 #include "scanner_spec.hh"
00097 #include "scanner_dfa_spec.hh"
00098 #include "basic_dfa_spec.hh"
00099 #include "token_spec.hh"
00100 #include "dfa_profile.hh"
00101
00102
00103 ScannerDfaWriter::ScannerDfaWriter (TokenSpec &tSpec_,
00104 PropRegistry ®istry_) :
00105 Writer(registry_),
00106 tSpec(tSpec_)
00107 {}
00108
00109 void ScannerDfaWriter::writeScanner (ScannerDfaSpec &dfa)
00110 {
00111 string throwClause;
00112 string includeMacro = className2macro(dfa.className);
00113 if (registry["USE_EXCEPTIONS"]) throwClause = " throw (ScanException)";
00114
00115
00116
00117 registry["DUMP_PROFILE"] = false;
00118 ScannerDfaProfile sdp(dfa);
00119 if ((string) registry["PROFILING_FILE"] != "")
00120 {
00121 #ifdef HAVE_STAT
00122 struct stat st1, st2;
00123 stat(((string) registry["PROFILING_FILE"]).c_str(), &st1);
00124 stat(((string) registry["input_file"]).c_str(), &st2);
00125 if (st1.st_mtime < st2.st_mtime) {
00126 #else
00127 bool useExisting = false;
00128 if (ifstream(((string) registry["PROFILING_FILE"]).c_str())) {
00129 cout << "Profiling data file " << registry["PROFILING_FILE"]
00130 << " found. Use it ? (y/n):";
00131 cin >> useExisting;
00132 }
00133 if (!useExisting) {
00134 #endif
00135
00136
00137 registry["DUMP_PROFILE"] = true;
00138 if (registry["be_verbose"])
00139 cerr << formatWarning(Position(), string("Profile data file ") +
00140 (string) registry["PROFILING_FILE"] + " is older"
00141 " than the grammar file. Run the generated "
00142 "scanner once on the sample input to re-generate"
00143 " the profile data then re-run cppcc (see the "
00144 "user's guide for more information on profile "
00145 "based optimization).") << endl;
00146 } else {
00147
00148
00149 if (registry["be_verbose"])
00150 cerr << formatWarning(string("Will use profile data from ") +
00151 registry["PROFILING_FILE"] + " when generating"
00152 " this scanner.") << endl;
00153 ifstream ifs(((string) registry["PROFILING_FILE"]).c_str());
00154 if (!ifs) {
00155 cerr << formatWarning(string("Could not open profile data file ") +
00156 registry["PROFILING_FILE"] + ". It will be "
00157 "generated on the "
00158 "next run of the generated scanner.") << endl;
00159 registry["DUMP_PROFILE"] = true;
00160 } else if (!sdp.read(ifs))
00161 {
00162 cerr << formatWarning(string("Profile data from file ") +
00163 registry["PROFILING_FILE"] + " does not match "
00164 "this grammar. It will be re-generated on the "
00165 "next run of the generated scanner.") << endl;
00166 registry["DUMP_PROFILE"] = true;
00167 sdp.reset();
00168 }
00169 }
00170 }
00171
00173
00174
00175 openStream(fullPath(className2hh(dfa.className)));
00176
00177 line() << "#ifndef " << includeMacro;
00178 line() << "#define " << includeMacro;
00179 line();
00180 line() << "#include <string>";
00181 line() << "#include <fstream>";
00182 line() << "#include <iostream>";
00183 line() << "#include <exception>";
00184 line() << "#include <deque>";
00185 line() << "#include <stack>";
00186 line() << "using namespace std;";
00187
00188 line();
00189 line() << "#include \"" << className2hh(tSpec.className) << "\"";
00190 line();
00191 if (!dfa.preambleCode.empty())
00192 writeChunk(dfa.preambleCode);
00193 line() << "namespace cppcc";
00194 line() << "{";
00195 line();
00196
00197 line() << "class ScanException : public exception";
00198 line() << "{";
00199 indent();
00200 line() << "public:";
00201 indent();
00202 line();
00203 line() << "ScanException (const string &message_ = \"Scan exception\") :";
00204 line() << " message(message_)";
00205 line() << "{}";
00206 line();
00207 line() << "ScanException (const Position &pos_,";
00208 line() << " const string &message_ = \"Scan exception\") :";
00209 line() << " message(message_), pos(pos_)";
00210 line() << "{}";
00211 line();
00212 line() << "~ScanException () throw ()";
00213 line() << "{}";
00214 line();
00215 line() << "virtual operator string () const";
00216 line() << "{";
00217 indent();
00218 line() << "return string(pos) + \": \" + message;";
00219 unindent();
00220 line() << "}";
00221 line();
00222 line() << "virtual const char* what () throw ()";
00223 line() << "{";
00224 indent();
00225 line() << "return message.c_str();";
00226 unindent();
00227 line() << "}";
00228 line();
00229 unindent();
00230 line() << "private:";
00231 line();
00232 indent();
00233 line() << "Position pos;";
00234 line() << "string message;";
00235 unindent();
00236 unindent();
00237 line() << "};";
00238 line();
00239
00240 line() << "class " << dfa.className << "_base";
00241 line() << "{";
00242 indent();
00243 line() << "protected:";
00244 indent();
00245 line() << "bool onScanError (ScanException &ex) { return false; }";
00246 line() << "bool onIOError (ScanException &ex) { return false; }";
00247 line() << "bool wrap () { return false; }";
00248 line() << "void commonTokenAction (" << tSpec.className << " &tok) {}";
00249 unindent();
00250 unindent();
00251 line() << "};";
00252 line();
00253
00254 line() << "class " << dfa.className << ": public "
00255 << dfa.className << "_base";
00256 if (!dfa.inheritance.empty()) {
00257 ofs << ", ";
00258 writeChunk(dfa.inheritance);
00259 }
00260 line() << "{";
00261 line() << "public:";
00262 indent();
00263 line();
00264
00265 line() << "struct t_laCell";
00266 line() << "{";
00267 indent();
00268 line() << tSpec.className << " *token;";
00269 line() << "struct t_laCell *next;";
00270 line() << "t_laCell(" << tSpec.className
00271 << " *token_, struct t_laCell *next_) : token(token_), next(next_) {}";
00272 line() << "~t_laCell() { delete token; }";
00273 unindent();
00274 line() << "};";
00275 line();
00276
00277 line() << dfa.className << " (istream *in_ = NULL)" << throwClause;
00278 line() << " : _buffer(NULL), _reject(false), _more(false),";
00279 if (registry["DEBUG_SCANNER"])
00280 line() << "_bsz(5), _pbsz(5)";
00281 else
00282 line() << "_bsz(8 * 1024), _pbsz(100)";
00283 line() << "{";
00284 indent();
00285 line() << "switchToStream(in_);";
00286 line() << "lookahead = false;";
00287 line() << "la0 = _laBegin = _laEnd = new struct t_laCell(new " << tSpec.className << ", NULL);";
00288 line() << "la0->next = la0;";
00289 line() << "switchToState(START);";
00290 if (registry["DUMP_PROFILE"])
00291 line() << "_freqs_init();";
00292 unindent();
00293 line() << "}";
00294 line();
00295 if (registry["DUMP_PROFILE"]) {
00296 line();
00297 line() << "~" << dfa.className << "() { _freqs_dump(); }";
00298 line();
00299 }
00300 line() << "void rejectToken() {_reject = true; }";
00301 line();
00302 line() << "void switchToStream (istream *in)";
00303 line() << "{";
00304 indent();
00305 line() << "resetPos();";
00306 line() << "inputStream = in;";
00307 line() << "_binit();";
00308 unindent();
00309 line() << "}";
00310 line();
00311
00312 line() << "class StreamState";
00313 line() << "{";
00314 indent();
00315 line() << "friend class " << dfa.className << ";";
00316 line() << "istream *inputStream;";
00317 line() << "char *_buffer, *_ch, *_start, *_eob;";
00318 if (registry["COUNT_COLUMNS"])
00319 line() << "char * _bol;";
00320 line() << "int _bsz, _pbsz;";
00321 line() << "bool _lta, _eof;";
00322 line() << "Position bPos, ePos;";
00323 line();
00324 line() << "StreamState (istream *inputStream_, char *_buffer_, int _bsz_,";
00325 if (registry["COUNT_COLUMNS"])
00326 line() << " int _pbsz_, char *_start_, char *_bol_, char *_ch_,";
00327 else
00328 line() << " int _pbsz_, char *_start_, char *_ch_,";
00329 line() << " char *_eob_, bool _lta_, bool _eof_,";
00330 line() << " Position &bPos_, Position &ePos_) :";
00331 line() << " inputStream(inputStream_), _buffer(_buffer_), _bsz(_bsz_),";
00332 if (registry["COUNT_COLUMNS"])
00333 line() << " _pbsz(_pbsz_), _start(_start_), _bol(_bol_), _ch(_ch_),";
00334 else
00335 line() << " _pbsz(_pbsz_), _start(_start_), _ch(_ch_),";
00336 line() << " _eob(_eob_), _lta(_lta_), _eof(_eof_),";
00337 line() << " bPos(bPos_), ePos(ePos_)";
00338 line() << "{}";
00339 line();
00340 line() << "~StreamState () { if (_buffer != NULL) delete[] _buffer; }";
00341 unindent();
00342 line() << "};";
00343 line();
00344
00345 line() << "StreamState* pushStream (istream *in)";
00346 line() << "{";
00347 indent();
00348 line() << "StreamState *res = new StreamState(inputStream, _buffer, _bsz,";
00349 if (registry["COUNT_COLUMNS"])
00350 line() << " _pbsz, _more ? _start : _ch, _bol, _ch,";
00351 else
00352 line() << " _pbsz, _more ? _start : _ch, _ch,";
00353 line() << " _eob, _lta, _eof,";
00354 line() << " _more ? bPos : ePos, ePos);";
00355 line() << "_buffer = NULL;";
00356 line() << "switchToStream(in);";
00357 line() << "return res;";
00358 unindent();
00359 line() << "}";
00360 line();
00361
00362 line() << "void popStream (StreamState *s)";
00363 line() << "{";
00364 indent();
00365 line() << "if (_buffer != NULL) delete[] _buffer;";
00366 line() << "inputStream = s->inputStream;";
00367 line() << "_buffer = s->_buffer;";
00368 line() << "_bsz = s->_bsz;";
00369 line() << "_pbsz = s->_pbsz;";
00370 line() << "_start = s->_start;";
00371 if (registry["COUNT_COLUMNS"])
00372 line() << "_bol = s->_bol;";
00373 line() << "_ch = s->_ch;";
00374 line() << "_eob = s->_eob;";
00375 line() << "_lta = s->_lta;";
00376 line() << "_eof = s->_eof;";
00377 line() << "bPos = s->bPos;";
00378 line() << "ePos = s->ePos;";
00379 line() << "s->_buffer = NULL;";
00380 line() << "delete s;";
00381 unindent();
00382 line() << "}";
00383 line();
00384
00385
00386 line() << "istream& getInputStream ()";
00387 line() << "{";
00388 indent();
00389 line() << "return *inputStream;";
00390 unindent();
00391 line() << "}";
00392 line();
00393
00394 line() << "int switchToState (int newState)";
00395 line() << "{";
00396 indent();
00397 line() << "int prevState = _state;";
00398 line() << "_state = newState;";
00399 line() << "return prevState;";
00400 unindent();
00401 line() << "}";
00402 line();
00403
00404 line() << "int getState ()";
00405 line() << "{";
00406 indent();
00407 line() << "return _state;";
00408 unindent();
00409 line() << "}";
00410 line();
00411
00412 line() << "int pushState (int newState)";
00413 line() << "{";
00414 indent();
00415 line() << "_stateStack.push(_state);";
00416 line() << "return switchToState(newState);";
00417 unindent();
00418 line() << "}";
00419 line();
00420
00421 line() << "int popState ()";
00422 line() << "{";
00423 indent();
00424 line() << "int prevState = switchToState(_stateStack.top());";
00425 line() << "_stateStack.pop();";
00426 line() << "return prevState;";
00427 unindent();
00428 line() << "}";
00429 line();
00430
00431 line() << "const Position& getCurrentPos ()";
00432 line() << "{";
00433 indent();
00434 line() << "return bPos;";
00435 unindent();
00436 line() << "}";
00437 line();
00438
00439 line() << "void resetPos ()";
00440 line() << "{";
00441 indent();
00442 line() << "ePos.ln = bPos.ln = 1;";
00443 if (registry["COUNT_COLUMNS"]) {
00444 line() << "bPos.col = ePos.col = 1;";
00445 line() << "_bol = _ch;";
00446 }
00447 unindent();
00448 line() << "}";
00449 line();
00450
00451 line() << "int newLine ()";
00452 line() << "{";
00453 indent();
00454 if (registry["COUNT_COLUMNS"]) {
00455 line() << "_bol = _ch;";
00456 line() << "ePos.col = 1;";
00457 }
00458 line() << "return ePos.ln++;";
00459 unindent();
00460 line() << "}";
00461 line();
00462
00463 line() << "bool lookingAhead ()";
00464 line() << "{";
00465 indent();
00466 line() << "return lookahead;";
00467 unindent();
00468 line() << "}";
00469 line();
00470
00471
00472 line() << "int getChar ()" << throwClause;
00473 line() << "{";
00474 indent();
00475 line() << "for(;;) {";
00476 indent();
00477 line() << "unsigned int c = *_ch++;";
00478 line() << "if (_ch == _eob) {";
00479 indent();
00480 line() << "_ch--;";
00481 line() << "if (_eof && !wrap()) return EOF;";
00482 line() << "else _brefill();";
00483 unindent();
00484 line() << "} else {";
00485 indent();
00486 line() << "if (_start == _ch - 1) _start = _ch;";
00487 line() << "return c;";
00488 unindent();
00489 line() << "}";
00490 unindent();
00491 line() << "}";
00492 unindent();
00493 line() << "}";
00494 line();
00495
00496 line() << "void unGetChars (const char *c, int n);";
00497
00498 line() << "void unGetChar (char c)";
00499 line() << "{";
00500 indent();
00501 line() << "unGetChars(&c, 1);";
00502 unindent();
00503 line() << "}";
00504 line();
00505
00506 line() << "void unGetChars (const string &s)";
00507 line() << "{";
00508 indent();
00509 line() << "unGetChars(s.data(), s.length());";
00510 unindent();
00511 line() << "}";
00512 line();
00513
00514 line() << "void unGetChars (const char *s)";
00515 line() << "{";
00516 indent();
00517 line() << "unGetChars(s, strlen(s));";
00518 unindent();
00519 line() << "}";
00520 line();
00521
00522 line() << tSpec.className << "* la (int k)" << throwClause;
00523 line() << "{";
00524 indent();
00525 line() << "struct t_laCell *p, *q;";
00526 line() << "for (p = la0; k >= 0; k--, q = p, p = p->next) {";
00527 indent();
00528 line() << "if (p == _laEnd) {";
00529 indent();
00530 line() << "if (_laEnd->next == _laBegin)";
00531 indent();
00532 line() << "_laEnd->next = new struct t_laCell(new " << tSpec.className
00533 << "(), _laBegin);";
00534 unindent();
00535 line() << "_scan(_laEnd->token);";
00536 line() << "_laEnd = _laEnd->next;";
00537 unindent();
00538 line() << "}";
00539 unindent();
00540 line() << "}";
00541 line() << "return q->token;";
00542 unindent();
00543 line() << "}";
00544 line();
00545
00546 line() << tSpec.className << "* la ()" << throwClause;
00547 line() << "{";
00548 indent();
00549 line() << "if (la0 == _laEnd) {";
00550 indent();
00551 line() << "if (_laEnd->next == _laBegin)";
00552 indent();
00553 line() << "_laEnd->next = new struct t_laCell(new " << tSpec.className
00554 << "(), _laBegin);";
00555 unindent();
00556 line() << "struct t_laCell *x = _laEnd;";
00557 line() << "_laEnd = _laEnd->next;";
00558 line() << "_scan(x->token);";
00559 unindent();
00560 line() << "}";
00561 line() << "return la0->token;";
00562 unindent();
00563 line() << "}";
00564 line();
00565
00566
00567 line() << "void consume ()" << throwClause;
00568 line() << "{";
00569 indent();
00570 line() << "la();";
00571 line() << "la0 = la0->next;";
00572 line() << "if (!lookahead)";
00573 indent();
00574 line() << "_laBegin = _laBegin->next;";
00575 unindent();
00576 unindent();
00577 line() << "}";
00578 line();
00579
00580
00581
00582
00583 line() << "void unchecked_consume ()";
00584 line() << "{";
00585 indent();
00586 line() << "la0 = la0->next;";
00587 line() << "if (!lookahead)";
00588 indent();
00589 line() << "_laBegin = _laBegin->next;";
00590 unindent();
00591 unindent();
00592 line() << "}";
00593 line();
00594
00595 line() << "void setMarker ()";
00596 line() << "{";
00597 indent();
00598 line() << "lookahead = true;";
00599 line() << "laMarkers.push(la0);";
00600 unindent();
00601 line() << "}";
00602 line();
00603
00604 line() << "void rewindToMarker ()";
00605 line() << "{";
00606 indent();
00607 line() << "la0 = laMarkers.top();";
00608 line() << "laMarkers.pop();";
00609 line() << "if (laMarkers.empty()) {";
00610 indent();
00611 line() << "lookahead = false;";
00612 line() << "_laBegin = la0;";
00613 unindent();
00614 line() << "}";
00615 unindent();
00616 line() << "}";
00617 line();
00618
00619
00620
00621 line();
00622 for (int i = 0; i < dfa.states.size(); i++)
00623 line() << "static const int " << dfa.states[i]->name << ";";
00624 line();
00625
00626 unindent();
00627
00628 line() << "protected: // the ugly stuff";
00629 line();
00630 indent();
00631 line() << "void _scan(" << tSpec.className
00632 << " *token)" << throwClause <<";";
00633
00634 line() << "Position bPos, ePos;";
00635 line() << "istream *inputStream;";
00636 line();
00637
00638 line() << "bool lookahead;";
00639 line() << "struct t_laCell *la0, *_laBegin, *_laEnd;";
00640 line() << "stack<struct t_laCell*> laMarkers;";
00641 line() << "int _state;";
00642 line() << "stack<int> _stateStack;";
00643 line() << "bool _reject;";
00644 line();
00645
00646 line() << "void _brefill ()" << throwClause << ";";
00647 line() << "void _binit()" << throwClause << ";";
00648 line() << "int _bsz; //buffer size";
00649 line() << "int _pbsz; //extra bytes for pushback";
00650
00651
00652
00653 line() << "char *_buffer;";
00654
00655
00656
00657
00658 line() << "char *_ch; // current char";
00659 line() << "char *_eob; // one past the last char in the buffer";
00660 line() << "char *_start; // points ot the token's start";
00661 line() << "bool _eof; // eof seen during _brefill";
00662 line() << "bool _lta; // last token in the stream was accepted";
00663 line() << "bool _more; // true in token actions of MORE tokens";
00664 if (registry["COUNT_COLUMNS"])
00665 line() << "char * _bol; // _ch - _bol = col increment.";
00666 line();
00667 unindent();
00668 if (registry["DUMP_PROFILE"])
00669 {
00670 line() << "private: // DFA profiling stuff";
00671 line();
00672 indent();
00673 line() << "unsigned int **_freqs[" << sdp.size() << "];";
00674 line() << "unsigned int *_freqs_sz[" << sdp.size() << "];";
00675 line();
00676 line() << "void _freqs_init ();";
00677 line();
00678 line() << "void _freqs_dump ();";
00679 line();
00680 line() << "void _bump_counter (int lState, int state, int transition);";
00681 unindent();
00682 line();
00683 }
00684 if (!dfa.userCode.empty()) {
00685 line() << "private: // user code";
00686 line();
00687 for (int i = 0; i < dfa.userCode.size(); i++)
00688 writeChunk(dfa.userCode[i]);
00689 line();
00690 }
00691 line() << "}; // end class " << dfa.className;
00692 line();
00693 line();
00694 line() << "} //end namespace cppcc" << endl;
00695 line() << "#endif // ifndef " << includeMacro;
00696
00697 closeStream();
00698
00700
00701
00702 openStream(fullPath(className2cc(dfa.className)));
00703 line();
00704 line() << "#include \"" << className2hh(dfa.className) << "\"";
00705 line();
00706 for (int i = 0; i < dfa.states.size(); i++)
00707 line() << "const int cppcc::" << dfa.className << "::" <<
00708 dfa.states[i]->name << " = " << i << ";";
00709 line();
00710
00711 line() << "void cppcc::" << dfa.className << "::_brefill ()" << throwClause;
00712 line() << "{";
00713 indent();
00714 if (registry["DEBUG_SCANNER"])
00715 line() << "cerr << \"refilling buffer.\" << endl;";
00716 line() << "if (!*inputStream) {";
00717 indent();
00718 writeExceptionCode("Input error.", "onIOError", false);
00719 unindent();
00720 line() << "}";
00721 line() << "for (struct t_laCell *p = _laBegin; p != _laEnd; p = p->next)";
00722 indent();
00723 line() << "p->token->_cacheImg();";
00724 unindent();
00725 line() << "if (_start != _ch) {";
00726 indent();
00727 line() << "if (_start <= _buffer + _pbsz) {";
00728 indent();
00729 line() << "int nbsz = _pbsz;";
00730 line() << "do { nbsz *= 2; } while (nbsz <= _bsz + _pbsz);";
00731 if (registry["DEBUG_SCANNER"])
00732 line() << "cerr << \"growing buffer from \" << _bsz << \" to \" << nbsz"
00733 "<< endl;";
00734 line() << "_bsz = nbsz;";
00735 line() << "char *newBuf = new char[_bsz + _pbsz + 1];";
00736 line() << "memcpy(newBuf + _pbsz, _start, _ch - _start);";
00737 if (registry["COUNT_COLUMNS"])
00738 line() << "_bol += newBuf - _buffer;";
00739 line() << "_ch += newBuf - _buffer;";
00740 line() << "_start = newBuf + _pbsz;";
00741 line() << "delete[] _buffer;";
00742 line() << "_buffer = newBuf;";
00743 unindent();
00744 line() << "} else {";
00745 indent();
00746 line() << "memmove(_buffer + _pbsz, _start, _ch - _start);";
00747 if (registry["COUNT_COLUMNS"]) line() << "_bol -= _start - (_buffer + _pbsz);";
00748 line() << "_ch = _buffer + _pbsz + (_ch - _start);";
00749 line() << "_start = _buffer + _pbsz;";
00750 unindent();
00751 line() << "}";
00752 unindent();
00753 line() << "} else {";
00754 indent();
00755 line() << "_start = _ch = _buffer + _pbsz;";
00756 if (registry["COUNT_COLUMNS"])
00757 line() << "_bol = _start;";
00758 unindent();
00759 line() << "}";
00760 line() << "inputStream->read(_ch, _bsz - (_ch - (_buffer + _pbsz)));";
00761 line() << "_eob = _ch + inputStream->gcount();";
00762 line() << "*_eob++ = \'\\0\';";
00763 line() << "_eof = inputStream->eof();";
00764 line() << "_lta = false;";
00765 unindent();
00766 line() << "}";
00767 line();
00768 line() << "void cppcc::" << dfa.className << "::_binit ()" << throwClause;
00769 line() << "{";
00770 indent();
00771 line() << "if (_buffer == NULL) _buffer = new char[_bsz + _pbsz + 1];";
00772 line() << "_start = _ch = _eob = _buffer + _pbsz;";
00773 line() << "*_eob++ = \'\\0\';";
00774 line() << "_eof = inputStream == NULL;";
00775 unindent();
00776 line() << "}";
00777
00778
00779 line() << "void cppcc::" << dfa.className
00780 << "::unGetChars (const char *c, int n)";
00781 line() << "{";
00782
00783
00784
00785
00786
00787 indent();
00788 line () << "if (_start < _buffer + n) { //grow the buffer";
00789 indent();
00790 line() << "int npbsz = _pbsz, delta = 0;";
00791 line() << "for (struct t_laCell *p = _laBegin; p != _laEnd; p = p->next)";
00792 indent();
00793 line() << "p->token->_cacheImg();";
00794 unindent();
00795 line() << "do { npbsz *= 2; delta = npbsz - _pbsz; } while (delta < n);";
00796 if (registry["DEBUG_SCANNER"])
00797 line() << "cerr << \"growing pushback area from \" << _pbsz"
00798 " << \" to \" << npbsz << endl;";
00799 line() << "char *newBuf = new char[_bsz + npbsz + 1];";
00800 line() << "memcpy(newBuf + delta, _start, _eob - _start + 1);";
00801 if (registry["COUNT_COLUMNS"])
00802 line() << "_bol += newBuf + delta - _start;";
00803 line() << "_ch += newBuf + delta - _start;";
00804 line() << "_eob += newBuf + delta - _start;";
00805 line() << "_start = newBuf + delta;";
00806 line() << "_pbsz = npbsz;";
00807 line() << "delete[] _buffer;";
00808 line() << "_buffer = newBuf;";
00809 unindent();
00810 line() << "}";
00811 line() << "memmove(_start - n, _start, n);";
00812 line() << "_ch -= n;";
00813 line() << "_start -= n;";
00814 if (registry["COUNT_COLUMNS"])
00815 line() << "_bol -= n;";
00816 line() << "memcpy(_ch, c, n);";
00817 unindent();
00818 line() << "}";
00819 line();
00820
00821 if (registry["DUMP_PROFILE"])
00822 {
00823 line();
00824 line() << "void cppcc::" << dfa.className << "::_freqs_init ()";
00825 line() << "{";
00826 indent();
00827 for (int i = 0; i < sdp.size() ; i++) {
00828 line() << "_freqs[" << i << "] = new (unsigned int *)[" << sdp[i].size()
00829 << "];";
00830 line() << "_freqs_sz[" << i << "] = new unsigned int [" << sdp[i].size()
00831 << "];";
00832 for (int j = 0; j < sdp[i].size(); j++) {
00833 line() << "_freqs[" << i << "][" << j << "] = new unsigned int["
00834 << sdp[i][j].size() << "];";
00835 line() << "_freqs_sz[" << i << "][" << j << "] = " << sdp[i][j].size()
00836 << ";";
00837 line() << "for (int i = 0; i < " << sdp[i][j].size() << "; i++)";
00838 line() << " _freqs[" << i << "][" << j << "][i] = 0;";
00839 }
00840
00841 }
00842 unindent();
00843 line() << "}";
00844 line();
00845 line() << "void cppcc::" << dfa.className << "::_freqs_dump ()";
00846 line() << "{";
00847 indent();
00848 line() << "ofstream ofs(\"" << registry["PROFILING_FILE"] << "\");";
00849 for (int i = 0; i < sdp.size(); i++) {
00850 for (int j = 0; j < sdp[i].size(); j++) {
00851 line() << "for (unsigned int i = 0; i < _freqs_sz["
00852 << i << "][" << j << "]; i++)";
00853 line() << " ofs << _freqs[" << i << "][" << j << "][i] << \" \";";
00854 line() << "ofs << endl;";
00855 }
00856 line() << "ofs << endl; ";
00857 }
00858 unindent();
00859 line() << "}";
00860 line();
00861 line() << "void cppcc::" << dfa.className
00862 << "::_bump_counter (int lState, int state, int transition)";
00863 line() << "{";
00864 indent();
00865 line() << "if (_freqs[lState][state][transition] == UINT_MAX) {";
00866 line() << "for (unsigned int i = 0; i < _freqs_sz[lState][state]; i++)";
00867 line() << " _freqs[lState][state][i] >>= 1;";
00868 line() << "}";
00869 line() << "_freqs[lState][state][transition]++;";
00870 unindent();
00871 line() << "}";
00872 line();
00873 }
00874
00875 line() << "void cppcc::" << dfa.className << "::_scan (" << tSpec.className
00876 << " *token)" << throwClause;
00877 line() << "{";
00878 indent();
00879 line() << "register unsigned char c;";
00880 line();
00881
00882 line() << "for(;;)";
00883 line() << "{";
00884 indent();
00885
00886
00887
00888 line() << "switch (getState())";
00889 line() << "{";
00890 for (int i = 0; i < dfa.states.size(); i++)
00891 {
00892 line() << "case " << dfa.states[i]->name << ":";
00893 indent();
00894 writeDfa(*dfa.states[i], sdp[i]);
00895 unindent();
00896 }
00897
00898
00899 line() << "default:";
00900 indent();
00901 line() << "{";
00902 indent();
00903 writeExceptionCode("Illegal lexical state", "onScanError");
00904 unindent();
00905 line() << "}";
00906 unindent();
00907
00908 line() << "}";
00909
00910
00911 line();
00912 for (int t = 0; t < tSpec.count(); t++)
00913 {
00914 if (tSpec[t].kind() == ITokenSpec::special) continue;
00915
00916 line() << "__accept_token_" << tSpec[t].name() << "_:";
00917 indent();
00918
00919 if (registry["DEBUG_SCANNER"]) {
00920 line() << "{";
00921 indent();
00922 line() << "char x = *_ch;";
00923 line() << "*_ch = \'\\0\';";
00924 line() << "cerr << \"Accepted a " << tSpec[t].name()
00925 << "(\" << " << t << " << \") token at position \" << bPos.ln"
00926 << (registry["COUNT_COLUMNS"] ? " << \",\" << bPos.col" : "")
00927 << " << \" up to \" << ePos.ln"
00928 << (registry["COUNT_COLUMNS"] ? " << \",\" << ePos.col + (_ch - _bol)" : "")
00929 << " << \" image: \\\"\" << _start << \"\\\".\" << endl;";
00930 line() << "*_ch = x;";
00931 unindent();
00932 line() << "}";
00933 }
00934
00935 switch (tSpec[t].kind())
00936 {
00937 case ITokenSpec::skip:
00938 if (registry["COUNT_COLUMNS"]) {
00939 line() << "ePos.col += _ch - _bol;";
00940 line() << "_bol = _ch;";
00941 }
00942 if (!tSpec[t].tokAction().empty()) {
00943 line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
00944 << ", bPos, ePos, _start, _ch - _start);";
00945 line() << "{";
00946 writeChunk(tSpec[t].tokAction());
00947 line() << "}";
00948 }
00949 line() << "_start = _ch;";
00950 line() << "bPos = ePos;";
00951 line() << "continue;";
00952 break;
00953 case ITokenSpec::keyword:
00954 line() << "{";
00955 indent();
00956 line() << "token->id = " << tSpec.className << "::" << tSpec[t].name()
00957 << ";";
00958 if (registry["COUNT_COLUMNS"]) {
00959 line() << "ePos.col += _ch - _bol;";
00960 line() << "_bol = _ch;";
00961 }
00962 line() << "token->bPos = bPos;";
00963 line() << "token->ePos = ePos;";
00964 if (!tSpec[t].tokAction().empty()) {
00965 line() << "token->_set(_start, _ch - _start);";
00966 line() << "{";
00967 writeChunk(tSpec[t].tokAction());
00968 line() << "}";
00969 }
00970 line() << "commonTokenAction (*token);";
00971 line() << "bPos = ePos;";
00972 line() << "_start = _ch;";
00973 line() << "if (_reject) { _reject = false; continue; }";
00974 line() << "else return;";
00975 unindent();
00976 line() << "}";
00977 break;
00978 case ITokenSpec::regular:
00979 line() << "{";
00980 indent();
00981 if (registry["COUNT_COLUMNS"]) {
00982 line() << "ePos.col += _ch - _bol;";
00983 line() << "_bol = _ch;";
00984 }
00985 line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
00986 << ", bPos, ePos, _start, _ch - _start);";
00987 if (!tSpec[t].tokAction().empty()) {
00988 line() << "{";
00989 writeChunk(tSpec[t].tokAction());
00990 line() << "}";
00991 }
00992 line() << "commonTokenAction (*token);";
00993 line() << "bPos = ePos;";
00994 line() << "_start = _ch;";
00995 line() << "if (_reject) { _reject = false; continue; }";
00996 line() << "else return;";
00997 unindent();
00998 line() << "}";
00999 break;
01000 case ITokenSpec::more:
01001 if (!tSpec[t].tokAction().empty()) {
01002 if (registry["COUNT_COLUMNS"]) {
01003 line() << "ePos.col += _ch - _bol;";
01004 line() << "_bol = _ch;";
01005 }
01006 line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
01007 << ", bPos, ePos, _start, _ch - _start);";
01008 line() << "_more = true;";
01009 line() << "{";
01010 writeChunk(tSpec[t].tokAction());
01011 line() << "}";
01012 line() << "_more = false;";
01013 }
01014 line() << "continue;";
01015 }
01016 unindent();
01017 }
01018 line() << "__scan_error_:";
01019 indent();
01020 line() << "{";
01021 indent();
01022 #ifdef DEBUG
01023 line() << "cerr << \" unexpected: 0x\" << hex << (int) c << endl;";
01024 #endif
01025 line() << "string msg = string(\"Unexpected character \\\'\") + (char) c + \"\\\'.\";";
01026 writeExceptionCode("msg", "onScanError", true);
01027 unindent();
01028 line() << "}";
01029 unindent();
01030
01031 line() << "__unexpected_eof_:";
01032 indent();
01033 line() << "{";
01034 indent();
01035 line() << "string msg(\"Unexpected EOF.\");";
01036 writeExceptionCode("msg", "onScanError", true);
01037 unindent();
01038 line() << "}";
01039
01040 line() << "__eof_seen_:";
01041 indent();
01042 line() << "token->id = " << tSpec.className << "::eof;";
01043 line() << "token->bPos = bPos;";
01044 line() << "token->ePos = ePos;";
01045 line() << "return;";
01046 unindent();
01047
01048 unindent();
01049 unindent();
01050 line() << "}";
01051 unindent();
01052 line() << "}";
01053 line();
01054 closeStream();
01055 }
01056
01057 typedef map<int, vector<unsigned char> > GoToMap;
01058 typedef pair<unsigned long int, GoToMap::iterator> Wtrans;
01059 struct t_Wgtr : public binary_function<Wtrans, Wtrans, bool>
01060 {
01061 bool operator () (const Wtrans &a, const Wtrans &b)
01062 {
01063 return a.first > b.first;
01064 }
01065 };
01066
01067 void ScannerDfaWriter::writeDfa (BasicDfaSpec &dfa, BasicDfaProfile &sdp)
01068 {
01069 for (int s = 0; s < dfa.states.size(); s++)
01070 {
01071 line() << "__" << dfa.name << "_" << s << "_:";
01072 indent();
01073
01074
01075 GoToMap goTo;
01076 for (int t = 0; t < dfa.states[s].transitions.size(); t++)
01077 {
01078 goTo[dfa.states[s].transitions[t].to].
01079 push_back(dfa.states[s].transitions[t].on);
01080 }
01081
01082
01083
01084
01085
01086
01087 vector<Wtrans> weightedTrans;
01088
01089 {
01090 int k = 0;
01091 for (GoToMap::iterator i = goTo.begin(); i != goTo.end(); i++, k++)
01092 weightedTrans.push_back(Wtrans(sdp[s][k], i));
01093
01094 stable_sort<vector<Wtrans>::iterator, t_Wgtr>(weightedTrans.begin(),
01095 weightedTrans.end(),
01096 struct t_Wgtr());
01097 }
01098
01099
01100 int transNo = 0;
01101 for (vector<Wtrans>::iterator i = weightedTrans.begin();
01102 i != weightedTrans.end(); i++, transNo++)
01103 {
01104 if (i != weightedTrans.begin()) line() << "else ";
01105 else {
01106 line() << "c = *_ch++;";
01107 line();
01108 }
01109 Wtrans &wtr = *i;
01110 unsigned long int weight = wtr.first;
01111 ofs << " /*" << weight << "*/ if (";
01112 int nextState = (*wtr.second).first;
01113 vector<unsigned char> &on = (*wtr.second).second;
01114 sort(on.begin(), on.end());
01115 int j = 0;
01116 j = writeCharCompare(on, j);
01117 while (j < on.size())
01118 {
01119 ofs << " || ";
01120 j = writeCharCompare(on, j);
01121 }
01122 ofs << ")";
01123 indent();
01124 line();
01125 if (registry["DUMP_PROFILE"])
01126 ofs << "{ _bump_counter(" << dfa.name << ", " << s << ", "
01127 << transNo << "); ";
01128 ofs << "goto __" << dfa.name << "_" << nextState << "_;";
01129 if (registry["DUMP_PROFILE"])
01130 ofs << " }";
01131 unindent();
01132 }
01133 if (!goTo.empty())
01134 {
01135 line() << "if ((_ch-- == _eob) && (c == \'\\0\')) {";
01136 indent();
01137 line() << "if (_eof) {";
01138 indent();
01139 if (!registry["TOKENS_SPAN_EOF"]) {
01140 line() << "if ((_ch != _start) && !_lta)";
01141 if (dfa.states[s].isFinal)
01142 ofs << "{ _lta = true; goto __accept_token_"
01143 << tSpec[dfa.states[s].tokId].name() << "_; }";
01144 else ofs << "goto __unexpected_eof_;";
01145 }
01146 line() << "if (!wrap()) goto __eof_seen_;";
01147 unindent();
01148 line() << "}";
01149 line() << "if (_ch + 1 == _eob) _brefill();";
01150 line() << "goto __" << dfa.name << "_" << s << "_;";
01151 unindent();
01152 line() << "}";
01153 }
01154
01155 if (dfa.states[s].isFinal) {
01156
01157 line() << "goto __accept_token_"
01158 << tSpec[dfa.states[s].tokId].name() << "_;";
01159 } else {
01160
01161 line() << "goto __scan_error_;";
01162 }
01163 unindent();
01164 }
01165 }
01166
01167 string formatChar (unsigned int c)
01168 {
01169 ostringstream oss;
01170 oss << "'";
01171 if (isgraph(c)) {
01172 switch (c) {
01173 case '\'': oss << "\\\'"; break;
01174 case '\\': oss << "\\\\"; break;
01175 default: oss << (unsigned char) c;
01176 }
01177 }
01178 else
01179 switch (c)
01180 {
01181 case ' ': oss << " "; break;
01182 case '\n': oss << "\\n"; break;
01183 case '\t': oss << "\\t"; break;
01184 case '\v': oss << "\\v"; break;
01185 case '\b': oss << "\\b"; break;
01186 case '\r': oss << "\\r"; break;
01187 case '\f': oss << "\\f"; break;
01188 case '\\': oss << "\\\\"; break;
01189 case '\"': oss << "\\\""; break;
01190 case '\'': oss << "\\\'"; break;
01191 default:
01192 oss << oct << "\\" << c;
01193 }
01194 oss << "\' /*" << dec << c<< "*/";
01195 return oss.str();
01196 }
01197
01198 int ScannerDfaWriter::writeCharCompare (vector<unsigned char> v, int i)
01199 {
01200 int j = i + 1;
01201 unsigned char first = v[i];
01202 unsigned char last = v[i];
01203 while ((j < v.size()) && (v[j] == last + 1)) last = v[j++];
01204
01205 if (first == last) {
01206 if (first == 0)
01207 ofs << "((c == \'\\0\') && (_ch != _eob))";
01208 else
01209 ofs << "(c == " << formatChar(first) << ")";
01210
01211 } else {
01212 if (first == 0) {
01213 ofs << "((c == \'\\0\') && (_ch != _eob)) || ";
01214 first++;
01215 }
01216 if (last == 255)
01217 ofs << "("<< formatChar(first) << " <= c)";
01218 else
01219 ofs << "((" << formatChar(first) << " <= c) && (c <= "
01220 << formatChar(last) << "))";
01221 }
01222
01223 return j;
01224 }
01225
01226
01227 void ScannerDfaWriter::writeExceptionCode (const string &what,
01228 const string &handlerName,
01229 bool isName)
01230 {
01231 if (isName) {
01232 line() << "ScanException __ex(ePos, " << what << ");";
01233 } else {
01234 line() << "ScanException __ex(ePos, \"" << what << "\");";
01235 }
01236 if (registry["DEBUG_SCANNER"])
01237 line() << "cerr << \"Scanner exception: \" << __ex.what() << endl;";
01238 line() << "if (!" << handlerName << "(__ex))";
01239 indent();
01240 if (registry["USE_EXCEPTIONS"]) line() << "throw __ex;";
01241 else line() << "abort();";
01242 unindent();
01243 }