Main Page   Modules   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

scanner_spec.cc

Go to the documentation of this file.
00001 /*
00002  *  File:       scanner_spec.cc
00003  *              $Id: scanner_spec.cc,v 1.9 2002/06/13 11:42:21 alec Exp $
00004  *
00005  *  Author:     Alec Panoviciu (alecu@email.com)
00006  * 
00007  *  Comments:
00008  *
00009  *  Revision history:
00010  *
00011  *  $Log: scanner_spec.cc,v $
00012  *  Revision 1.9  2002/06/13 11:42:21  alec
00013  *  added #line stuff
00014  *
00015  *  Revision 1.8  2002/05/22 01:43:00  alec
00016  *  *** empty log message ***
00017  *
00018  *  Revision 1.7  2002/05/08 10:36:53  alec
00019  *  added keyword tokens support
00020  *
00021  *  Revision 1.6  2002/05/07 10:02:18  alec
00022  *  fixed some bugs & mem leaks; added MORE tokens support
00023  *
00024  *  Revision 1.5  2002/05/04 17:39:22  alec
00025  *  the scanner works (slightly tested)
00026  *
00027  *  Revision 1.4  2002/04/29 17:55:41  alec
00028  *  regexps almost done
00029  *
00030  *  Revision 1.3  2002/04/29 09:34:10  alec
00031  *  scanner ptree building compiles
00032  *
00033  */
00034 
00035 /*
00036   Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com)
00037 
00038   This program is free software; you can redistribute it and/or modify
00039   it under the terms of the GNU General Public License as published by
00040   the Free Software Foundation; either version 2 of the License, or
00041   (at your option) any later version.
00042 
00043   This program is distributed in the hope that it will be useful,
00044   but WITHOUT ANY WARRANTY; without even the implied warranty of
00045   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00046   GNU General Public License for more details.
00047 
00048   You should have received a copy of the GNU General Public License
00049   along with this program; if not, write to the Free Software
00050   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00051 
00052  */
00053 
00054 #include <algorithm>
00055 
00056 #include "debug.h"
00057 #include "scanner_spec.hh"
00058 #include "dfa_re_node_builder.hh"
00059 #include "itoken_spec.hh"
00060 
00061 ScannerSpec::ScannerSpec (ITokenSpec &tokens_, PropRegistry &registry) :
00062   nodeBuilder(*new DfaReNodeBuilder(registry)),
00063   tokens(tokens_),
00064   starRe(NULL)
00065 {
00066   // EOF has always id 0; we add it as a SPECIAl token (no regexp).
00067   tokens.addToken("eof", Position());
00068 }
00069 
00070 
00071 ScannerSpec::~ScannerSpec ()
00072 {
00073   delete &nodeBuilder;
00074 }
00075 
00076 
00077 IReNodeBuilder& ScannerSpec::getReNodeBuilder ()
00078 {
00079   return nodeBuilder;
00080 }
00081 
00082 
00083 void ScannerSpec::addRegToken (const vector<string> &states,
00084                                const string &name, ReNode *regexp,
00085                                const string &tokenAction,
00086                                const Position &pos)
00087   throw (ParseException)
00088 {
00089   if (tokens.isToken(name))
00090     throw ParseException(pos, "Duplicate token name.");
00091 
00092   regexp = nodeBuilder.createCatNode(regexp,
00093                                      nodeBuilder.createEotNode(tokens.count(),
00094                                                                pos),
00095                                      pos);
00096   tokens.addToken(name, ITokenSpec::regular, regexp, tokenAction, pos);
00097   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00098 }
00099 
00100 
00101 void ScannerSpec::addSkipToken (const vector<string> &states,
00102                                 const string &name, ReNode *regexp,
00103                                 const string &tokenAction,
00104                                 const Position &pos)
00105   throw (ParseException)
00106 {
00107   if (tokens.isToken(name))
00108     throw ParseException(pos, "Duplicate token name.");
00109 
00110   regexp = nodeBuilder.createCatNode(regexp,
00111                                nodeBuilder.createEotNode(tokens.count(), pos),
00112                                      pos);
00113   tokens.addToken(name, ITokenSpec::skip, regexp, tokenAction, pos);
00114 
00115   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00116 }
00117 
00118 
00119 void ScannerSpec::addMoreToken (const vector<string> &states,
00120                                 const string &name, ReNode *regexp,
00121                                 const string &tokenAction,
00122                                 const Position &pos)
00123   throw (ParseException)
00124 {
00125   if (tokens.isToken(name))
00126     throw ParseException(pos, "Duplicate token name.");
00127 
00128   regexp = nodeBuilder.createCatNode(regexp,
00129                                nodeBuilder.createEotNode(tokens.count(), pos),
00130                                      pos);
00131   tokens.addToken(name, ITokenSpec::more, regexp, tokenAction, pos);
00132 
00133   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00134 }
00135 
00136 
00137 void ScannerSpec::addKeywordToken (const vector<string> &states,
00138                                    const string &name, ReNode *regexp,
00139                                    const string &tokenAction,
00140                                    const Position &pos)
00141   throw (ParseException)
00142 {
00143   if (tokens.isToken(name))
00144     throw ParseException(pos, "Duplicate token name.");
00145 
00146   regexp = nodeBuilder.createCatNode(regexp,
00147                                nodeBuilder.createEotNode(tokens.count(), pos),
00148                                      pos);
00149   tokens.addToken(name, ITokenSpec::keyword, regexp, tokenAction, pos);
00150 
00151   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00152 }
00153 
00154   
00155 void ScannerSpec::addSpecialToken (const string &name, const Position &pos)
00156   throw (ParseException)
00157 {
00158   if (tokens.isToken(name))
00159     throw ParseException(pos, "Duplicate token name.");
00160 
00161   tokens.addToken(name, pos);
00162 }
00163 
00164 
00165 void ScannerSpec::setPreambleCode (const string &block, const Position &pos)
00166 {
00167   preambleCode.code = block;
00168   preambleCode.pos = pos;
00169 }
00170 
00171 
00172 void ScannerSpec::addCodeBlock (const string &block, const Position &pos)
00173 {
00174   userCode.push_back(CodeChunk(pos, block));
00175 }
00176 
00177 
00178 void ScannerSpec::setInheritance (const string &inheritance_,
00179                                   const Position &pos)
00180 {
00181   inheritance.pos = pos;
00182   inheritance.code = inheritance_;
00183 }
00184 
00185 
00186 void ScannerSpec::setClassName (const string &name_)
00187 {
00188   className = name_;
00189 }
00190 
00191 
00192 bool operator == (const LexicalStateSpec &st, const string &s)
00193 {
00194   return st.name == s;
00195 }
00196 
00197 
00198 void ScannerSpec::updateStates (const vector<string> &tokStates,
00199                                 DfaSourceRe *regexp)
00200 {
00201   /*
00202     this goes like this:
00203     - first, if no state was given, make it be START
00204     - then if the state name is "*", add the new regexp to all the states
00205                                 (including *).
00206     - then, for each state in the list, add the new regexp; if the state is
00207                                 new, add also the regexp corresponding to
00208                                 "*").
00209   */
00210   vector<string> vs = tokStates;
00211   
00212   if (vs.size() == 0) vs.push_back("START");
00213   if (vs[0] == "*") {
00214     if (starRe != NULL)
00215       starRe = dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe, regexp,
00216                                                                    Position()));
00217     else starRe = regexp;
00218     
00219     for (int i = 0; i < states.size(); i++)
00220       states[i].regexp =
00221         dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(states[i].regexp,
00222                                                             regexp->clone(),
00223                                                             Position()));
00224   } else {
00225     DfaSourceRe *tmp = regexp; // holds a new copy of regexp at each iteration
00226                              // the first one is the original itself
00227     for (int i = 0; i < vs.size(); i++)
00228     {
00229       vector<LexicalStateSpec>::iterator j;
00230       if (tmp == NULL) tmp = dynamic_cast<DfaSourceRe*>(regexp->clone());
00231       
00232       if ((j = find(states.begin(), states.end(), vs[i])) != states.end())
00233       { //is this state already added?
00234         (*j).regexp =
00235           dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode((*j).regexp,
00236                                                               tmp,
00237                                                               Position()));
00238         tmp = NULL; // ask for a new copy of regexp
00239       } else 
00240       { //no, we have to create it and also add the regexp for * if any
00241         if (starRe != NULL)
00242           tmp =
00243             dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe->clone(),
00244                                                                 tmp,
00245                                                                 Position()));
00246         states.push_back(LexicalStateSpec(vs[i], tmp));
00247         tmp = NULL;  // ask for a new copy of regexp
00248       }
00249     }
00250   }
00251 }
00252 
00253 
00254 #ifdef DEBUG
00255 
00256 void ScannerSpec::dump (ostream &os) const
00257 {
00258   os << "ScannerSpec duump: " << endl
00259      << "className: " << className << endl
00260      << "inheritance: " << inheritance.code << endl
00261      << "preambleCode:" << endl
00262      << preambleCode.code << endl
00263      << "----------------------------------------------------------" << endl
00264      << "userCode:" << endl;
00265   for (int i = 0; i < userCode.size(); i++)
00266     os << userCode[i].code;
00267   os << endl
00268      << "----------------------------------------------------------" << endl
00269      << "Tokens: " << endl;
00270   for (int i = 0; i < tokens.count(); i++)
00271     tokens[i].dump(os);
00272   os << "----------------------------------------------------------" << endl;
00273   os << "States:" << endl;
00274   for (int i = 0; i < states.size(); i++) {
00275     states[i].dump(os);
00276     os << "----------------------------------------------------------" << endl;
00277   }
00278 }
00279 
00280 void LexicalStateSpec::dump (ostream &os) const
00281 {
00282   os << "State: " << name << endl
00283      << "Regexp: " << endl;
00284   regexp->dump(os);
00285   os << endl;
00286 }
00287 
00288 
00289 #endif

Generated at Tue Jul 9 21:05:46 2002 for CppCC by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001