Main Page   Modules   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

scanner_dfa_writer.cc

Go to the documentation of this file.
00001 /*
00002  *  File:       scanner_dfa_writer.cc
00003  *              $Id: scanner_dfa_writer.cc,v 1.13 2002/07/09 08:54:51 alec Exp $
00004  *
00005  *  Author:     Alec Panoviciu (alecu@email.com)
00006  * 
00007  *  Comments:
00008  *
00009  *  Revision history:
00010  *
00011  *  $Log: scanner_dfa_writer.cc,v $
00012  *  Revision 1.13  2002/07/09 08:54:51  alec
00013  *  0.0.3 stuff
00014  *
00015  *  Revision 1.12  2002/07/09 03:04:58  alec
00016  *  OWN_STRINGS bu*beep*it finally vanished
00017  *  gcc 3.1&mingw - related cleanups
00018  *
00019  *  Revision 1.11  2002/06/26 20:50:22  alec
00020  *  g++ 3.x happy
00021  *
00022  *  Revision 1.10  2002/06/23 23:29:55  alec
00023  *  profile-based optimization stuff
00024  *
00025  *  Revision 1.9  2002/06/13 11:41:46  alec
00026  *  added #line stuff
00027  *
00028  *  Revision 1.8  2002/05/31 12:11:09  alec
00029  *  *** empty log message ***
00030  *
00031  *  Revision 1.7  2002/05/27 03:03:36  alec
00032  *  doc update
00033  *
00034  *  Revision 1.6  2002/05/22 01:42:24  alec
00035  *  stream(buffer) switching, getChar, unGetChar implemented
00036  *  some bug fixes
00037  *
00038  *  Revision 1.5  2002/05/16 21:46:06  alec
00039  *  8x generated code speed improvement (almost 2x flex) weeepeeee!!!
00040  *
00041  *  Revision 1.4  2002/05/08 10:36:18  alec
00042  *  added keyword tokens support
00043  *
00044  *  Revision 1.3  2002/05/07 10:02:18  alec
00045  *  fixed some bugs & mem leaks; added MORE tokens support
00046  *
00047  *  Revision 1.2  2002/05/04 17:39:22  alec
00048  *  the scanner works (slightly tested)
00049  *
00050  *  Revision 1.1  2002/05/01 16:34:01  alec
00051  *  *** empty log message ***
00052  *
00053  */
00054 
00055 /*
00056   Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com)
00057 
00058   This program is free software; you can redistribute it and/or modify
00059   it under the terms of the GNU General Public License as published by
00060   the Free Software Foundation; either version 2 of the License, or
00061   (at your option) any later version.
00062 
00063   This program is distributed in the hope that it will be useful,
00064   but WITHOUT ANY WARRANTY; without even the implied warranty of
00065   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00066   GNU General Public License for more details.
00067 
00068   You should have received a copy of the GNU General Public License
00069   along with this program; if not, write to the Free Software
00070   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00071 
00072  */
00073 
00074 #include "../config.h"
00075 
00076 #ifdef HAVE_STAT
00077 #include <sys/types.h>
00078 #include <sys/stat.h>
00079 #include <unistd.h>
00080 #endif
00081 
00082 #include <fstream>
00083 #include <string>
00084 #include <vector>
00085 #include <map>
00086 #include <algorithm>
00087 #include <cctype>
00088 #include <sstream>
00089 #include <list>
00090 #include <functional>
00091 using namespace std;
00092 
00093 
00094 #include "scanner_dfa_writer.hh"
00095 #include "prop_registry.hh"
00096 #include "scanner_spec.hh"
00097 #include "scanner_dfa_spec.hh"
00098 #include "basic_dfa_spec.hh"
00099 #include "token_spec.hh"
00100 #include "dfa_profile.hh"
00101 
00102 
00103 ScannerDfaWriter::ScannerDfaWriter (TokenSpec &tSpec_,
00104                                     PropRegistry &registry_) :
00105   Writer(registry_),
00106   tSpec(tSpec_)
00107 {}
00108 
00109 void ScannerDfaWriter::writeScanner (ScannerDfaSpec &dfa)
00110 {
00111   string throwClause;
00112   string includeMacro = className2macro(dfa.className);
00113   if (registry["USE_EXCEPTIONS"]) throwClause = " throw (ScanException)";
00114 
00115   // check whether a profiling file was given and see whether we should use it
00116   // now or cause it to be generated.
00117   registry["DUMP_PROFILE"] = false;
00118   ScannerDfaProfile sdp(dfa);
00119   if ((string) registry["PROFILING_FILE"] != "")
00120   {
00121 #ifdef HAVE_STAT
00122     struct stat st1, st2;
00123     stat(((string) registry["PROFILING_FILE"]).c_str(), &st1);
00124     stat(((string) registry["input_file"]).c_str(), &st2);
00125     if (st1.st_mtime < st2.st_mtime) {
00126 #else
00127     bool useExisting = false;
00128     if (ifstream(((string) registry["PROFILING_FILE"]).c_str())) {
00129       cout << "Profiling data file " << registry["PROFILING_FILE"]
00130            << " found. Use it ? (y/n):";
00131       cin >> useExisting;
00132     }
00133     if (!useExisting) {
00134 #endif
00135       // the profile data appears to be older than the grammar file. we assume
00136       // we should generate it again
00137       registry["DUMP_PROFILE"] = true;
00138       if (registry["be_verbose"])
00139         cerr << formatWarning(Position(), string("Profile data file ") +
00140                               (string) registry["PROFILING_FILE"] + " is older"
00141                               " than the grammar file. Run the generated "
00142                               "scanner once on the sample input to re-generate"
00143                               " the profile data then re-run cppcc (see the "
00144                               "user's guide for more information on profile "
00145                               "based optimization).") << endl;
00146     } else {
00147       // the profile data seems to match our grammar, we try and use it when
00148       // generating the scanner
00149       if (registry["be_verbose"])
00150         cerr << formatWarning(string("Will use profile data from ") +
00151                               registry["PROFILING_FILE"] + " when generating"
00152                               " this scanner.") << endl;
00153       ifstream ifs(((string) registry["PROFILING_FILE"]).c_str());
00154       if (!ifs) {
00155         cerr << formatWarning(string("Could not open profile data file ") +
00156                               registry["PROFILING_FILE"] + ". It will be "
00157                               "generated on the "
00158                               "next run of the generated scanner.") << endl;
00159         registry["DUMP_PROFILE"] = true;
00160       } else if (!sdp.read(ifs))
00161       {
00162         cerr << formatWarning(string("Profile data from file ") +
00163                               registry["PROFILING_FILE"] + " does not match "
00164                               "this grammar. It will be re-generated on the "
00165                               "next run of the generated scanner.") << endl;
00166         registry["DUMP_PROFILE"] = true;
00167         sdp.reset();
00168       }
00169     }
00170   }
00171 
00173   // scanner's .hh
00174 
00175   openStream(fullPath(className2hh(dfa.className)));
00176 
00177   line() << "#ifndef " << includeMacro;
00178   line() << "#define " << includeMacro;
00179   line();
00180   line() << "#include <string>";
00181   line() << "#include <fstream>";
00182   line() << "#include <iostream>";
00183   line() << "#include <exception>";
00184   line() << "#include <deque>";
00185   line() << "#include <stack>";
00186   line() << "using namespace std;";
00187   //  line() << "#include <ios>";
00188   line();
00189   line() << "#include \"" << className2hh(tSpec.className) << "\"";
00190   line();
00191   if (!dfa.preambleCode.empty())
00192     writeChunk(dfa.preambleCode);
00193   line() << "namespace cppcc";
00194   line() << "{";
00195   line();
00196   
00197   line() << "class ScanException : public exception";
00198   line() << "{";
00199   indent();
00200   line() << "public:";
00201   indent();
00202   line();
00203   line() << "ScanException (const string &message_ = \"Scan exception\") :";
00204   line() << "  message(message_)";
00205   line() << "{}";
00206   line();
00207   line() << "ScanException (const Position &pos_,";
00208   line() << "               const string &message_ = \"Scan exception\") :";
00209   line() << "  message(message_), pos(pos_)";
00210   line() << "{}";
00211   line();
00212   line() << "~ScanException () throw ()";
00213   line() << "{}";
00214   line();
00215   line() << "virtual operator string () const";
00216   line() << "{";
00217   indent();
00218   line() << "return string(pos) + \": \" + message;";
00219   unindent();
00220   line() << "}";
00221   line();
00222   line() << "virtual const char* what () throw ()";
00223   line() << "{";
00224   indent();
00225   line() << "return message.c_str();";
00226   unindent();
00227   line() << "}";
00228   line();
00229   unindent();
00230   line() << "private:";
00231   line();
00232   indent();
00233   line() << "Position pos;";
00234   line() << "string message;";
00235   unindent();
00236   unindent();
00237   line() << "};";
00238   line();
00239 
00240   line() << "class " << dfa.className << "_base";
00241   line() << "{";
00242   indent();
00243   line() << "protected:";
00244   indent();
00245   line() << "bool onScanError (ScanException &ex) { return false; }";
00246   line() << "bool onIOError (ScanException &ex) { return false; }";
00247   line() << "bool wrap () { return false; }";
00248   line() << "void commonTokenAction (" << tSpec.className << " &tok) {}";
00249   unindent();
00250   unindent();
00251   line() << "};";
00252   line();
00253   
00254   line() << "class " << dfa.className << ": public "
00255          << dfa.className << "_base";
00256   if (!dfa.inheritance.empty()) {
00257     ofs << ", ";
00258     writeChunk(dfa.inheritance);
00259   }
00260   line() << "{";
00261   line() << "public:";
00262   indent();
00263   line();
00264 
00265   line() << "struct t_laCell";
00266   line() << "{";
00267   indent();
00268   line() << tSpec.className << " *token;";
00269   line() << "struct t_laCell *next;";
00270   line() << "t_laCell(" << tSpec.className
00271         << " *token_, struct t_laCell *next_) : token(token_), next(next_) {}";
00272   line() << "~t_laCell() { delete token; }";
00273   unindent();
00274   line() << "};";
00275   line();
00276   
00277   line() << dfa.className << " (istream *in_ = NULL)" << throwClause;
00278   line() << "    : _buffer(NULL), _reject(false), _more(false),";
00279   if (registry["DEBUG_SCANNER"])
00280     line() << "_bsz(5), _pbsz(5)";
00281   else
00282     line() << "_bsz(8 * 1024), _pbsz(100)";
00283   line() << "{";
00284   indent();
00285   line() << "switchToStream(in_);";
00286   line() << "lookahead = false;";
00287   line() << "la0 = _laBegin = _laEnd = new struct t_laCell(new " << tSpec.className << ", NULL);";
00288   line() << "la0->next = la0;";
00289   line() << "switchToState(START);";
00290   if (registry["DUMP_PROFILE"])
00291     line() << "_freqs_init();";
00292   unindent();
00293   line() << "}";
00294   line();
00295   if (registry["DUMP_PROFILE"]) {
00296     line();
00297     line() << "~" << dfa.className << "() { _freqs_dump(); }";
00298     line();
00299   }
00300   line() << "void rejectToken() {_reject = true; }";
00301   line();
00302   line() << "void switchToStream (istream *in)";
00303   line() << "{";
00304   indent();
00305   line() << "resetPos();";
00306   line() << "inputStream = in;";
00307   line() << "_binit();";
00308   unindent();
00309   line() << "}";
00310   line();
00311 
00312   line() << "class StreamState";
00313   line() << "{";
00314   indent();
00315   line() << "friend class " << dfa.className << ";";
00316   line() << "istream *inputStream;";
00317   line() << "char *_buffer, *_ch, *_start, *_eob;";
00318   if (registry["COUNT_COLUMNS"])
00319     line() << "char * _bol;";
00320   line() << "int _bsz, _pbsz;";
00321   line() << "bool _lta, _eof;";
00322   line() << "Position bPos, ePos;";
00323   line();
00324   line() << "StreamState (istream *inputStream_, char *_buffer_, int _bsz_,";
00325   if (registry["COUNT_COLUMNS"]) 
00326     line() << "            int _pbsz_, char *_start_, char *_bol_, char *_ch_,";
00327   else
00328     line() << "            int _pbsz_, char *_start_, char *_ch_,";
00329   line() << "             char *_eob_, bool _lta_, bool _eof_,";
00330   line() << "             Position &bPos_, Position &ePos_) :";
00331   line() << "  inputStream(inputStream_), _buffer(_buffer_), _bsz(_bsz_),";
00332   if (registry["COUNT_COLUMNS"]) 
00333     line() << "    _pbsz(_pbsz_), _start(_start_), _bol(_bol_), _ch(_ch_),";
00334   else
00335     line() << "   _pbsz(_pbsz_), _start(_start_), _ch(_ch_),";
00336   line() << "   _eob(_eob_), _lta(_lta_), _eof(_eof_),";
00337   line() << "   bPos(bPos_), ePos(ePos_)";
00338   line() << "{}";
00339   line();
00340   line() << "~StreamState () { if (_buffer != NULL) delete[] _buffer; }";
00341   unindent();
00342   line() << "};";
00343   line();
00344   
00345   line() << "StreamState* pushStream (istream *in)";
00346   line() << "{";
00347   indent();
00348   line() << "StreamState *res = new StreamState(inputStream, _buffer, _bsz,";
00349   if (registry["COUNT_COLUMNS"]) 
00350     line() << "                    _pbsz, _more ? _start : _ch, _bol, _ch,";
00351   else
00352     line() << "                          _pbsz, _more ? _start : _ch, _ch,";
00353   line() << "                                   _eob, _lta, _eof,";
00354   line() << "                                 _more ? bPos : ePos, ePos);";
00355   line() << "_buffer = NULL;";
00356   line() << "switchToStream(in);";
00357   line() << "return res;";
00358   unindent();
00359   line() << "}";
00360   line();
00361   
00362   line() << "void popStream (StreamState *s)";
00363   line() << "{";
00364   indent();
00365   line() << "if (_buffer != NULL) delete[] _buffer;";
00366   line() << "inputStream = s->inputStream;";
00367   line() << "_buffer = s->_buffer;";
00368   line() << "_bsz = s->_bsz;";
00369   line() << "_pbsz = s->_pbsz;";
00370   line() << "_start = s->_start;";
00371   if (registry["COUNT_COLUMNS"])
00372     line() << "_bol = s->_bol;";
00373   line() << "_ch = s->_ch;";
00374   line() << "_eob = s->_eob;";
00375   line() << "_lta = s->_lta;";
00376   line() << "_eof = s->_eof;";
00377   line() << "bPos = s->bPos;";
00378   line() << "ePos = s->ePos;";
00379   line() << "s->_buffer = NULL;";
00380   line() << "delete s;";
00381   unindent();
00382   line() << "}";
00383   line();
00384     
00385 
00386   line() << "istream& getInputStream ()";
00387   line() << "{";
00388   indent();
00389   line() << "return *inputStream;";
00390   unindent();
00391   line() << "}";
00392   line();
00393   
00394   line() << "int switchToState (int newState)";
00395   line() << "{";
00396   indent();
00397   line() << "int prevState = _state;";
00398   line() << "_state = newState;";
00399   line() << "return prevState;";
00400   unindent();
00401   line() << "}";
00402   line();
00403 
00404   line() << "int getState ()";
00405   line() << "{";
00406   indent();
00407   line() << "return _state;";
00408   unindent();
00409   line() << "}";
00410   line();
00411 
00412   line() << "int pushState (int newState)";
00413   line() << "{";
00414   indent();
00415   line() << "_stateStack.push(_state);";
00416   line() << "return switchToState(newState);";
00417   unindent();
00418   line() << "}";
00419   line();
00420 
00421   line() << "int popState ()";
00422   line() << "{";
00423   indent();
00424   line() << "int prevState = switchToState(_stateStack.top());";
00425   line() << "_stateStack.pop();";
00426   line() << "return prevState;";
00427   unindent();
00428   line() << "}";
00429   line();
00430   
00431   line() << "const Position& getCurrentPos ()";
00432   line() << "{";
00433   indent();
00434   line() << "return bPos;";
00435   unindent();
00436   line() << "}";
00437   line();
00438 
00439   line() << "void resetPos ()";
00440   line() << "{";
00441   indent();
00442   line() << "ePos.ln = bPos.ln = 1;";
00443   if (registry["COUNT_COLUMNS"]) {
00444     line() << "bPos.col = ePos.col = 1;";
00445     line() << "_bol = _ch;";
00446   }
00447   unindent();
00448   line() << "}";
00449   line();
00450   
00451   line() << "int newLine ()";
00452   line() << "{";
00453   indent();
00454   if (registry["COUNT_COLUMNS"]) {
00455     line() << "_bol = _ch;";
00456     line() << "ePos.col = 1;";
00457   }
00458   line() << "return ePos.ln++;";
00459   unindent();
00460   line() << "}";
00461   line();
00462 
00463   line() << "bool lookingAhead ()";
00464   line() << "{";
00465   indent();
00466   line() << "return lookahead;";
00467   unindent();
00468   line() << "}";
00469   line();
00470   
00471   
00472   line() << "int getChar ()" << throwClause;
00473   line() << "{";
00474   indent();
00475   line() << "for(;;) {";
00476   indent();
00477   line() << "unsigned int c = *_ch++;";
00478   line() << "if (_ch == _eob) {";
00479   indent();
00480   line() << "_ch--;";
00481   line() << "if (_eof && !wrap()) return EOF;";
00482   line() << "else _brefill();";
00483   unindent();
00484   line() << "} else {";
00485   indent();
00486   line() << "if (_start == _ch - 1) _start = _ch;";
00487   line() << "return c;";
00488   unindent();
00489   line() << "}";
00490   unindent();
00491   line() << "}";
00492   unindent();
00493   line() << "}";
00494   line();
00495 
00496   line() << "void unGetChars (const char *c, int n);";
00497   
00498   line() << "void unGetChar (char c)";
00499   line() << "{";
00500   indent();
00501   line() << "unGetChars(&c, 1);";
00502   unindent();
00503   line() << "}";
00504   line();
00505 
00506   line() << "void unGetChars (const string &s)";
00507   line() << "{";
00508   indent();
00509   line() << "unGetChars(s.data(), s.length());";
00510   unindent();
00511   line() << "}";
00512   line();
00513 
00514   line() << "void unGetChars (const char *s)";
00515   line() << "{";
00516   indent();
00517   line() << "unGetChars(s, strlen(s));";
00518   unindent();
00519   line() << "}";
00520   line();
00521   
00522   line() << tSpec.className << "* la (int k)" << throwClause;
00523   line() << "{";
00524   indent();
00525   line() << "struct t_laCell *p, *q;";
00526   line() << "for (p = la0; k >= 0; k--, q = p, p = p->next) {";
00527   indent();
00528   line() << "if (p == _laEnd) {";
00529   indent();
00530   line() << "if (_laEnd->next == _laBegin)";
00531   indent();
00532   line() << "_laEnd->next = new struct t_laCell(new " << tSpec.className
00533          << "(), _laBegin);";
00534   unindent();
00535   line() << "_scan(_laEnd->token);";
00536   line() << "_laEnd = _laEnd->next;";
00537   unindent();
00538   line() << "}";
00539   unindent();
00540   line() << "}";
00541   line() << "return q->token;";
00542   unindent();
00543   line() << "}";
00544   line();
00545 
00546   line() << tSpec.className << "* la ()" << throwClause;
00547   line() << "{";
00548   indent();
00549   line() << "if (la0 == _laEnd) {";
00550   indent();
00551   line() << "if (_laEnd->next == _laBegin)";
00552   indent();
00553   line() << "_laEnd->next = new struct t_laCell(new " << tSpec.className
00554          << "(), _laBegin);";
00555   unindent();
00556   line() << "struct t_laCell *x = _laEnd;";
00557   line() << "_laEnd = _laEnd->next;";
00558   line() << "_scan(x->token);";
00559   unindent();
00560   line() << "}";
00561   line() << "return la0->token;";
00562   unindent();
00563   line() << "}";
00564   line();
00565   
00566   
00567   line() << "void consume ()" << throwClause;
00568   line() << "{";
00569   indent();
00570   line() << "la();"; // make sure we have a token there
00571   line() << "la0 = la0->next;";
00572   line() << "if (!lookahead)";
00573   indent();
00574   line() << "_laBegin = _laBegin->next;";
00575   unindent();
00576   unindent();
00577   line() << "}";
00578   line();
00579 
00580   // this one is actually used by the parser. It relies on the parser's code
00581   // to ensure that the queue contains a token and also to delete the returned
00582   // token. 
00583   line() << "void unchecked_consume ()";
00584   line() << "{";
00585   indent();
00586   line() << "la0 = la0->next;";
00587   line() << "if (!lookahead)";
00588   indent();
00589   line() << "_laBegin = _laBegin->next;";
00590   unindent();
00591   unindent();
00592   line() << "}";
00593   line();
00594 
00595   line() << "void setMarker ()";
00596   line() << "{";
00597   indent();
00598   line() << "lookahead = true;";
00599   line() << "laMarkers.push(la0);";
00600   unindent();
00601   line() << "}";
00602   line();
00603 
00604   line() << "void rewindToMarker ()";
00605   line() << "{";
00606   indent();
00607   line() << "la0 = laMarkers.top();";
00608   line() << "laMarkers.pop();";
00609   line() << "if (laMarkers.empty()) {";
00610   indent();
00611   line() << "lookahead = false;";
00612   line() << "_laBegin = la0;";
00613   unindent();
00614   line() << "}";
00615   unindent();
00616   line() << "}";
00617   line();
00618   
00619   //  line() << "
00620 
00621   line();
00622   for (int i = 0; i < dfa.states.size(); i++)
00623     line() << "static const int " << dfa.states[i]->name << ";";
00624   line();
00625   
00626   unindent();
00627   
00628   line() << "protected: // the ugly stuff";
00629   line();
00630   indent();
00631   line() << "void _scan(" << tSpec.className
00632          << " *token)" << throwClause <<";";
00633 
00634   line() << "Position bPos, ePos;";
00635   line() << "istream *inputStream;";
00636   line();
00637   
00638   line() << "bool lookahead;";
00639   line() << "struct t_laCell *la0, *_laBegin, *_laEnd;";
00640   line() << "stack<struct t_laCell*> laMarkers;";
00641   line() << "int _state;";
00642   line() << "stack<int> _stateStack;";
00643   line() << "bool _reject;";
00644   line();
00645   
00646   line() << "void _brefill ()" << throwClause << ";";
00647   line() << "void _binit()" << throwClause << ";";   
00648   line() << "int _bsz; //buffer size";
00649   line() << "int _pbsz; //extra bytes for pushback";
00650   // out internal buffer. _bsz are "normal characters", and we need an extra
00651   // byte to store the '\0' character that tells us we reached the buffer's
00652   // end.
00653   line() << "char *_buffer;";
00654   // _start points to the first char of the current token
00655   // _ch is the current char
00656   // _eob points one past the last character in the buffer (i.e to the '\0' we
00657   // appended. 
00658   line() << "char *_ch; // current char";
00659   line() << "char *_eob; // one past the last char in the buffer";
00660   line() << "char *_start; // points ot the token's start";
00661   line() << "bool _eof; // eof seen during _brefill";
00662   line() << "bool _lta; // last token in the stream was accepted";
00663   line() << "bool _more; // true in token actions of MORE tokens";
00664   if (registry["COUNT_COLUMNS"])
00665     line() << "char * _bol; // _ch - _bol = col increment.";
00666   line();
00667   unindent();
00668   if (registry["DUMP_PROFILE"])
00669   {
00670     line() << "private: // DFA profiling stuff";
00671     line();
00672     indent();
00673     line() << "unsigned int **_freqs[" << sdp.size() << "];";
00674     line() << "unsigned int *_freqs_sz[" << sdp.size() << "];";
00675     line();
00676     line() << "void _freqs_init ();";
00677     line();
00678     line() << "void _freqs_dump ();";
00679     line();
00680     line() << "void _bump_counter (int lState, int state, int transition);";
00681     unindent();
00682     line();
00683   }
00684   if (!dfa.userCode.empty()) {
00685     line() << "private: // user code";
00686     line();
00687     for (int i = 0; i < dfa.userCode.size(); i++)
00688       writeChunk(dfa.userCode[i]);
00689     line();
00690   }
00691   line() << "}; // end class " << dfa.className;
00692   line();
00693   line();
00694   line() << "} //end namespace cppcc" << endl;
00695   line() << "#endif // ifndef " << includeMacro;
00696   
00697   closeStream();
00698 
00700   // scanner's .cc
00701 
00702   openStream(fullPath(className2cc(dfa.className)));
00703   line();
00704   line() << "#include \"" << className2hh(dfa.className) << "\"";
00705   line();
00706   for (int i = 0; i < dfa.states.size(); i++)
00707     line() << "const int cppcc::" << dfa.className << "::" <<
00708       dfa.states[i]->name << " = " << i << ";";
00709   line();
00710 
00711   line() << "void cppcc::" << dfa.className << "::_brefill ()" << throwClause;
00712   line() << "{";
00713   indent();
00714   if (registry["DEBUG_SCANNER"])
00715       line() << "cerr << \"refilling buffer.\" << endl;";
00716   line() << "if (!*inputStream) {";
00717   indent();
00718   writeExceptionCode("Input error.", "onIOError", false);
00719   unindent();
00720   line() << "}";
00721   line() << "for (struct t_laCell *p = _laBegin; p != _laEnd; p = p->next)";
00722   indent();
00723   line() << "p->token->_cacheImg();";
00724   unindent();
00725   line() << "if (_start != _ch) {";
00726   indent();
00727   line() << "if (_start <= _buffer + _pbsz) {";
00728   indent();
00729   line() << "int nbsz = _pbsz;";
00730   line() << "do { nbsz *= 2; } while (nbsz <= _bsz + _pbsz);";
00731   if (registry["DEBUG_SCANNER"])
00732     line() << "cerr << \"growing buffer from \" << _bsz << \" to \" << nbsz"
00733       "<< endl;";
00734   line() << "_bsz = nbsz;";
00735   line() << "char *newBuf = new char[_bsz + _pbsz + 1];";
00736   line() << "memcpy(newBuf + _pbsz, _start, _ch - _start);";
00737   if (registry["COUNT_COLUMNS"])
00738     line() << "_bol += newBuf - _buffer;";
00739   line() << "_ch += newBuf - _buffer;";
00740   line() << "_start = newBuf + _pbsz;";
00741   line() << "delete[] _buffer;";
00742   line() << "_buffer = newBuf;";
00743   unindent();
00744   line() << "} else {";
00745   indent();
00746   line() << "memmove(_buffer + _pbsz, _start, _ch - _start);";
00747   if (registry["COUNT_COLUMNS"]) line() << "_bol -= _start - (_buffer + _pbsz);";
00748   line() << "_ch = _buffer + _pbsz + (_ch - _start);";
00749   line() << "_start = _buffer + _pbsz;";
00750   unindent();
00751   line() << "}";
00752   unindent();
00753   line() << "} else {";
00754   indent();
00755   line() << "_start = _ch = _buffer + _pbsz;";
00756   if (registry["COUNT_COLUMNS"])
00757     line() << "_bol = _start;";
00758   unindent();
00759   line() << "}";
00760   line() << "inputStream->read(_ch, _bsz - (_ch - (_buffer + _pbsz)));";
00761   line() << "_eob = _ch + inputStream->gcount();";
00762   line() << "*_eob++  = \'\\0\';";
00763   line() << "_eof = inputStream->eof();";
00764   line() << "_lta = false;";
00765   unindent();
00766   line() << "}";
00767   line();
00768   line() << "void cppcc::" << dfa.className << "::_binit ()" << throwClause;
00769   line() << "{";
00770   indent();
00771   line() << "if (_buffer == NULL) _buffer = new char[_bsz + _pbsz + 1];";
00772   line() << "_start = _ch = _eob = _buffer + _pbsz;";
00773   line() << "*_eob++  = \'\\0\';";
00774   line() << "_eof = inputStream == NULL;";
00775   unindent();
00776   line() << "}";
00777     
00778 
00779   line() << "void cppcc::" << dfa.className
00780          << "::unGetChars (const char *c, int n)";
00781   line() << "{";
00782   /* ok, since we cache the token images anyway this stuff can be greatly
00783   simplified. Once i'm convinced the OWN_SRINGS is dead and gone i can get rid
00784          of most of this tuff (cause i no longer need to preserve the text
00785          before _ch).
00786   */
00787   indent();
00788   line () << "if (_start < _buffer + n) { //grow the buffer";
00789   indent();
00790   line() << "int npbsz = _pbsz, delta = 0;";
00791   line() << "for (struct t_laCell *p = _laBegin; p != _laEnd; p = p->next)";
00792   indent();
00793   line() << "p->token->_cacheImg();";
00794   unindent();
00795   line() << "do { npbsz *= 2; delta = npbsz - _pbsz; } while (delta < n);";
00796   if (registry["DEBUG_SCANNER"])
00797     line() << "cerr << \"growing pushback area from \" << _pbsz"
00798       " << \" to \" << npbsz << endl;";
00799   line() << "char *newBuf = new char[_bsz + npbsz + 1];";
00800   line() << "memcpy(newBuf + delta, _start, _eob - _start + 1);";
00801   if (registry["COUNT_COLUMNS"])
00802     line() << "_bol += newBuf + delta - _start;";
00803   line() << "_ch += newBuf + delta - _start;";
00804   line() << "_eob += newBuf + delta - _start;";
00805   line() << "_start = newBuf + delta;";
00806   line() << "_pbsz = npbsz;";
00807   line() << "delete[] _buffer;";
00808   line() << "_buffer = newBuf;";
00809   unindent();
00810   line() << "}";
00811   line() << "memmove(_start - n, _start, n);";
00812   line() << "_ch -= n;";
00813   line() << "_start -= n;";
00814   if (registry["COUNT_COLUMNS"])
00815     line() << "_bol -= n;";
00816   line() << "memcpy(_ch, c, n);";
00817   unindent();
00818   line() << "}";
00819   line();
00820   
00821   if (registry["DUMP_PROFILE"])
00822   {
00823     line();
00824     line() << "void cppcc::" << dfa.className << "::_freqs_init ()";
00825     line() << "{";
00826     indent();
00827     for (int i = 0; i < sdp.size() ; i++) {
00828       line() << "_freqs[" << i << "] = new (unsigned int *)[" << sdp[i].size()
00829              << "];";
00830       line() << "_freqs_sz[" << i << "] = new unsigned int [" << sdp[i].size()
00831              << "];";
00832       for (int j = 0; j < sdp[i].size(); j++) {
00833         line() << "_freqs[" << i << "][" << j << "] = new unsigned int["
00834                << sdp[i][j].size() << "];";
00835         line() << "_freqs_sz[" << i << "][" << j << "] = " << sdp[i][j].size()
00836                << ";";
00837         line() << "for (int i = 0; i < " << sdp[i][j].size() << "; i++)";
00838         line() << "  _freqs[" << i << "][" << j << "][i] = 0;";
00839       }
00840           
00841     }
00842     unindent();
00843     line() << "}";
00844     line();
00845     line() << "void cppcc::" << dfa.className << "::_freqs_dump ()";
00846     line() << "{";
00847     indent();
00848     line() << "ofstream ofs(\"" << registry["PROFILING_FILE"] << "\");";
00849     for (int i = 0; i < sdp.size(); i++) {
00850       for (int j = 0; j < sdp[i].size(); j++) {
00851         line() << "for (unsigned int i = 0; i < _freqs_sz["
00852                << i << "][" << j << "]; i++)";
00853         line() << "  ofs << _freqs[" << i << "][" << j << "][i] << \" \";";
00854         line() << "ofs << endl;";
00855       }
00856       line() << "ofs << endl; ";
00857     }
00858     unindent();
00859     line() << "}";
00860     line();
00861     line() << "void cppcc::" << dfa.className
00862            << "::_bump_counter (int lState, int state, int transition)";
00863     line() << "{";
00864     indent();
00865     line() << "if (_freqs[lState][state][transition] ==  UINT_MAX) {";
00866     line() << "for (unsigned int i = 0; i < _freqs_sz[lState][state]; i++)";
00867     line() << "  _freqs[lState][state][i] >>= 1;";
00868     line() << "}";
00869     line() << "_freqs[lState][state][transition]++;";
00870     unindent();
00871     line() << "}";
00872     line();
00873   }
00874   
00875   line() << "void cppcc::" << dfa.className << "::_scan (" << tSpec.className
00876          << " *token)" << throwClause;
00877   line() << "{";
00878   indent();
00879   line() << "register unsigned char c;";
00880   line();
00881   
00882   line() << "for(;;)";
00883   line() << "{";
00884   indent();
00885 
00886   // the DFAs:
00887   
00888   line() << "switch (getState())";
00889   line() << "{";
00890   for (int i = 0; i < dfa.states.size(); i++)
00891   {
00892     line() << "case " << dfa.states[i]->name << ":";
00893     indent();
00894     writeDfa(*dfa.states[i], sdp[i]);
00895     unindent();
00896   }
00897 
00898   // default handler if lexical state went to nowhere
00899   line() << "default:";
00900   indent();
00901   line() << "{";
00902   indent();
00903   writeExceptionCode("Illegal lexical state", "onScanError");
00904   unindent();
00905   line() << "}";
00906   unindent();
00907   
00908   line() << "}"; // switch (getCurState())
00909 
00910   // code for accepting states/error:
00911   line();
00912   for (int t = 0; t < tSpec.count(); t++)
00913   {
00914     if (tSpec[t].kind() == ITokenSpec::special) continue;
00915     
00916     line() << "__accept_token_" << tSpec[t].name() << "_:";
00917     indent();
00918 
00919     if (registry["DEBUG_SCANNER"]) {
00920       line() << "{";
00921       indent();
00922       line() << "char x = *_ch;";
00923       line() << "*_ch = \'\\0\';";
00924       line() << "cerr << \"Accepted a " << tSpec[t].name()
00925              << "(\" << " << t << " << \") token at position \" << bPos.ln"
00926              << (registry["COUNT_COLUMNS"] ? " << \",\" << bPos.col" : "")
00927              << " << \" up to \" << ePos.ln"
00928              << (registry["COUNT_COLUMNS"] ? " << \",\" << ePos.col + (_ch - _bol)" : "")
00929              << " << \" image: \\\"\" << _start << \"\\\".\" << endl;";
00930       line() << "*_ch = x;";
00931       unindent();
00932       line() << "}";
00933     }
00934 
00935     switch (tSpec[t].kind())
00936     {
00937     case ITokenSpec::skip:
00938       if (registry["COUNT_COLUMNS"]) {
00939         line() << "ePos.col += _ch - _bol;";
00940         line() << "_bol = _ch;";
00941       }
00942       if (!tSpec[t].tokAction().empty()) {
00943         line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
00944                << ", bPos, ePos, _start, _ch - _start);";
00945         line() << "{";
00946         writeChunk(tSpec[t].tokAction());
00947         line() << "}";
00948       }
00949       line() << "_start = _ch;";
00950       line() << "bPos = ePos;";
00951       line() << "continue;";
00952       break;
00953     case ITokenSpec::keyword:
00954       line() << "{";
00955       indent();
00956       line() << "token->id = " << tSpec.className << "::" << tSpec[t].name()
00957              << ";";
00958       if (registry["COUNT_COLUMNS"]) {
00959         line() << "ePos.col += _ch - _bol;";
00960         line() << "_bol = _ch;";
00961       }
00962       line() << "token->bPos = bPos;";
00963       line() << "token->ePos = ePos;";
00964       if (!tSpec[t].tokAction().empty()) {
00965         line() << "token->_set(_start, _ch - _start);";
00966         line() << "{";
00967         writeChunk(tSpec[t].tokAction());
00968         line() << "}";
00969       }
00970       line() << "commonTokenAction (*token);";
00971       line() << "bPos = ePos;";
00972       line() << "_start = _ch;";
00973       line() << "if (_reject) { _reject = false; continue; }";
00974       line() << "else return;";
00975       unindent();
00976       line() << "}";
00977       break;
00978     case ITokenSpec::regular:
00979       line() << "{";
00980       indent();
00981       if (registry["COUNT_COLUMNS"]) {
00982         line() << "ePos.col += _ch - _bol;";
00983         line() << "_bol = _ch;";
00984       }
00985       line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
00986              << ", bPos, ePos, _start, _ch - _start);";
00987       if (!tSpec[t].tokAction().empty()) {
00988         line() << "{";
00989         writeChunk(tSpec[t].tokAction());
00990         line() << "}";
00991       }
00992       line() << "commonTokenAction (*token);";
00993       line() << "bPos = ePos;";
00994       line() << "_start = _ch;";
00995       line() << "if (_reject) { _reject = false; continue; }";
00996       line() << "else return;";
00997       unindent();
00998       line() << "}";
00999       break;
01000     case ITokenSpec::more:
01001       if (!tSpec[t].tokAction().empty()) {
01002         if (registry["COUNT_COLUMNS"]) {
01003           line() << "ePos.col += _ch - _bol;";
01004           line() << "_bol = _ch;";
01005         }
01006         line() << "token->_set(" << tSpec.className << "::" << tSpec[t].name()
01007                << ", bPos, ePos, _start, _ch - _start);";
01008         line() << "_more = true;";
01009         line() << "{";
01010         writeChunk(tSpec[t].tokAction());
01011         line() << "}";
01012         line() << "_more = false;";
01013       }
01014       line() << "continue;";
01015     }
01016     unindent();
01017   }
01018   line() << "__scan_error_:";
01019   indent();
01020   line() << "{";
01021   indent();
01022 #ifdef DEBUG
01023   line() << "cerr << \" unexpected: 0x\" << hex << (int) c << endl;";
01024 #endif
01025   line() << "string msg = string(\"Unexpected character \\\'\") + (char) c + \"\\\'.\";";
01026   writeExceptionCode("msg", "onScanError", true);
01027   unindent();
01028   line() << "}";
01029   unindent();
01030 
01031   line() << "__unexpected_eof_:";
01032   indent();
01033   line() << "{";
01034   indent();
01035   line() << "string msg(\"Unexpected EOF.\");";
01036   writeExceptionCode("msg", "onScanError", true);
01037   unindent();
01038   line() << "}";
01039 
01040   line() << "__eof_seen_:";
01041   indent();
01042   line() << "token->id = " << tSpec.className << "::eof;";
01043   line() << "token->bPos = bPos;";
01044   line() << "token->ePos = ePos;";
01045   line() << "return;";
01046   unindent();
01047 
01048   unindent();  
01049   unindent();
01050   line() << "}"; // for (;;)
01051   unindent();
01052   line() << "}"; // _scan()
01053   line();
01054   closeStream();
01055 }
01056 
01057 typedef map<int, vector<unsigned char> > GoToMap;
01058 typedef pair<unsigned long int, GoToMap::iterator> Wtrans;
01059 struct t_Wgtr : public binary_function<Wtrans, Wtrans, bool>
01060 {
01061   bool operator () (const Wtrans &a, const Wtrans &b)
01062   {
01063     return a.first > b.first;
01064   }
01065 };
01066       
01067 void ScannerDfaWriter::writeDfa (BasicDfaSpec &dfa, BasicDfaProfile &sdp)
01068 {
01069   for (int s = 0; s < dfa.states.size(); s++)
01070   {
01071     line() << "__" << dfa.name << "_" << s << "_:";
01072     indent();
01073     // regroup the transitions table by next state
01074     
01075     GoToMap goTo;
01076     for (int t = 0; t < dfa.states[s].transitions.size(); t++)
01077     {
01078       goTo[dfa.states[s].transitions[t].to].
01079                         push_back(dfa.states[s].transitions[t].on);
01080     }  
01081 
01082     // WeightedTrans will contain pairs consisting of an interator into the
01083     // goTo map (aka pointer to a transition from the current state) and its
01084     // weight or frequency as read from the profiling file. We then order this
01085     // list by the frequencies and generate code such that the most frequent
01086     // transitions are placed at the beginning.
01087     vector<Wtrans> weightedTrans;
01088 
01089     {
01090       int k = 0;
01091       for (GoToMap::iterator i = goTo.begin(); i != goTo.end(); i++, k++)
01092         weightedTrans.push_back(Wtrans(sdp[s][k], i));
01093 
01094       stable_sort<vector<Wtrans>::iterator, t_Wgtr>(weightedTrans.begin(),
01095                                                     weightedTrans.end(),
01096                                                     struct t_Wgtr());
01097     }
01098         
01099     // dump transitions:
01100     int transNo = 0;
01101     for (vector<Wtrans>::iterator i = weightedTrans.begin();
01102          i != weightedTrans.end(); i++, transNo++)
01103     {
01104       if (i != weightedTrans.begin()) line() << "else ";
01105       else {
01106         line() << "c = *_ch++;";
01107         line();
01108       }
01109       Wtrans &wtr = *i;
01110       unsigned long int weight = wtr.first;
01111       ofs << " /*" << weight << "*/ if (";
01112       int nextState = (*wtr.second).first;
01113       vector<unsigned char> &on = (*wtr.second).second;
01114       sort(on.begin(), on.end());
01115       int j = 0;
01116       j = writeCharCompare(on, j);
01117       while (j < on.size())
01118       {
01119         ofs << " || ";
01120         j = writeCharCompare(on, j);
01121       }
01122       ofs << ")";
01123       indent();
01124       line();
01125       if (registry["DUMP_PROFILE"])
01126         ofs << "{ _bump_counter(" << dfa.name << ", " << s << ", "
01127             << transNo << "); ";
01128       ofs << "goto __" << dfa.name << "_" << nextState << "_;";
01129       if (registry["DUMP_PROFILE"])
01130         ofs << " }";
01131       unindent();
01132     }
01133     if (!goTo.empty())
01134     {
01135       line() << "if ((_ch-- == _eob) && (c == \'\\0\')) {";
01136       indent();
01137       line() << "if (_eof) {";
01138       indent();
01139       if (!registry["TOKENS_SPAN_EOF"]) {
01140         line() << "if ((_ch != _start) && !_lta)";
01141         if (dfa.states[s].isFinal)
01142           ofs << "{ _lta = true; goto __accept_token_"
01143               << tSpec[dfa.states[s].tokId].name() << "_; }";
01144         else ofs << "goto __unexpected_eof_;";
01145       }
01146       line() << "if (!wrap()) goto __eof_seen_;";
01147       unindent();
01148       line() << "}";
01149       line() << "if (_ch + 1 == _eob) _brefill();";
01150       line() << "goto __" << dfa.name << "_" << s << "_;";
01151       unindent();
01152       line() << "}";
01153     }
01154     // dump what do we do if no transition was taken:
01155     if (dfa.states[s].isFinal) {
01156       // weppeeeee just found a token !
01157       line() << "goto __accept_token_"
01158              << tSpec[dfa.states[s].tokId].name() << "_;";
01159     } else {
01160       // uh oh...
01161       line() << "goto __scan_error_;";
01162     }
01163     unindent();
01164   }
01165 }
01166 
01167 string formatChar (unsigned int c)
01168 {
01169   ostringstream oss;
01170   oss << "'";
01171   if (isgraph(c)) {
01172     switch (c) {
01173     case '\'': oss << "\\\'"; break;
01174     case '\\': oss << "\\\\"; break;
01175     default: oss << (unsigned char) c;
01176     }
01177   }
01178   else 
01179     switch (c)
01180     {
01181     case ' ': oss << " "; break;
01182     case '\n': oss << "\\n"; break;
01183     case '\t': oss << "\\t"; break;
01184     case '\v': oss << "\\v"; break;
01185     case '\b': oss << "\\b"; break;
01186     case '\r': oss << "\\r"; break;
01187     case '\f': oss << "\\f"; break;
01188     case '\\': oss << "\\\\"; break;
01189     case '\"': oss << "\\\""; break;
01190     case '\'': oss << "\\\'"; break;
01191     default:
01192       oss << oct << "\\" << c;
01193     }
01194   oss << "\' /*" << dec << c<< "*/";
01195   return oss.str();
01196 }
01197 
01198 int ScannerDfaWriter::writeCharCompare (vector<unsigned char> v, int i)
01199 {
01200   int j = i + 1;
01201   unsigned char first = v[i];
01202   unsigned char last = v[i];
01203   while ((j < v.size()) && (v[j] == last + 1)) last = v[j++];
01204 
01205   if (first == last) {
01206     if (first == 0)
01207       ofs << "((c == \'\\0\') && (_ch != _eob))";
01208     else
01209       ofs << "(c == " << formatChar(first) << ")";
01210     
01211   } else {
01212     if (first == 0) {
01213       ofs << "((c == \'\\0\') && (_ch != _eob)) || ";
01214       first++;
01215     }
01216     if (last == 255)
01217       ofs << "("<< formatChar(first) << " <= c)";
01218     else 
01219       ofs << "((" << formatChar(first) << " <= c) && (c <= "
01220           << formatChar(last) << "))";
01221   }
01222   
01223   return j;
01224 }
01225 
01226 
01227 void ScannerDfaWriter::writeExceptionCode (const string &what,
01228                                            const string &handlerName,
01229                                            bool isName)
01230 {
01231   if (isName) {
01232     line() << "ScanException __ex(ePos, " << what << ");";
01233   } else {
01234     line() << "ScanException __ex(ePos, \"" << what << "\");";
01235   }
01236   if (registry["DEBUG_SCANNER"])
01237     line() << "cerr << \"Scanner exception: \" << __ex.what() << endl;";
01238   line() << "if (!" << handlerName << "(__ex))";
01239   indent();
01240   if (registry["USE_EXCEPTIONS"]) line() << "throw __ex;";
01241   else line() << "abort();";
01242   unindent();
01243 }

Generated at Tue Jul 9 21:05:46 2002 for CppCC by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001