00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00039 
00040 
00041 
00042 
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 #include <algorithm>
00055 
00056 #include "debug.h"
00057 #include "scanner_spec.hh"
00058 #include "dfa_re_node_builder.hh"
00059 #include "itoken_spec.hh"
00060 
00061 ScannerSpec::ScannerSpec (ITokenSpec &tokens_, PropRegistry ®istry) :
00062   nodeBuilder(*new DfaReNodeBuilder(registry)),
00063   tokens(tokens_),
00064   starRe(NULL)
00065 {
00066   
00067   tokens.addToken("eof", Position());
00068 }
00069 
00070 
00071 ScannerSpec::~ScannerSpec ()
00072 {
00073   delete &nodeBuilder;
00074 }
00075 
00076 
00077 IReNodeBuilder& ScannerSpec::getReNodeBuilder ()
00078 {
00079   return nodeBuilder;
00080 }
00081 
00082 
00083 void ScannerSpec::addRegToken (const vector<string> &states,
00084                                const string &name, ReNode *regexp,
00085                                const string &tokenAction,
00086                                const Position &pos)
00087   throw (ParseException)
00088 {
00089   if (tokens.isToken(name))
00090     throw ParseException(pos, "Duplicate token name.");
00091 
00092   regexp = nodeBuilder.createCatNode(regexp,
00093                                      nodeBuilder.createEotNode(tokens.count(),
00094                                                                pos),
00095                                      pos);
00096   tokens.addToken(name, ITokenSpec::regular, regexp, tokenAction, pos);
00097   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00098 }
00099 
00100 
00101 void ScannerSpec::addSkipToken (const vector<string> &states,
00102                                 const string &name, ReNode *regexp,
00103                                 const string &tokenAction,
00104                                 const Position &pos)
00105   throw (ParseException)
00106 {
00107   if (tokens.isToken(name))
00108     throw ParseException(pos, "Duplicate token name.");
00109 
00110   regexp = nodeBuilder.createCatNode(regexp,
00111                                nodeBuilder.createEotNode(tokens.count(), pos),
00112                                      pos);
00113   tokens.addToken(name, ITokenSpec::skip, regexp, tokenAction, pos);
00114 
00115   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00116 }
00117 
00118 
00119 void ScannerSpec::addMoreToken (const vector<string> &states,
00120                                 const string &name, ReNode *regexp,
00121                                 const string &tokenAction,
00122                                 const Position &pos)
00123   throw (ParseException)
00124 {
00125   if (tokens.isToken(name))
00126     throw ParseException(pos, "Duplicate token name.");
00127 
00128   regexp = nodeBuilder.createCatNode(regexp,
00129                                nodeBuilder.createEotNode(tokens.count(), pos),
00130                                      pos);
00131   tokens.addToken(name, ITokenSpec::more, regexp, tokenAction, pos);
00132 
00133   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00134 }
00135 
00136 
00137 void ScannerSpec::addKeywordToken (const vector<string> &states,
00138                                    const string &name, ReNode *regexp,
00139                                    const string &tokenAction,
00140                                    const Position &pos)
00141   throw (ParseException)
00142 {
00143   if (tokens.isToken(name))
00144     throw ParseException(pos, "Duplicate token name.");
00145 
00146   regexp = nodeBuilder.createCatNode(regexp,
00147                                nodeBuilder.createEotNode(tokens.count(), pos),
00148                                      pos);
00149   tokens.addToken(name, ITokenSpec::keyword, regexp, tokenAction, pos);
00150 
00151   updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00152 }
00153 
00154   
00155 void ScannerSpec::addSpecialToken (const string &name, const Position &pos)
00156   throw (ParseException)
00157 {
00158   if (tokens.isToken(name))
00159     throw ParseException(pos, "Duplicate token name.");
00160 
00161   tokens.addToken(name, pos);
00162 }
00163 
00164 
00165 void ScannerSpec::setPreambleCode (const string &block, const Position &pos)
00166 {
00167   preambleCode.code = block;
00168   preambleCode.pos = pos;
00169 }
00170 
00171 
00172 void ScannerSpec::addCodeBlock (const string &block, const Position &pos)
00173 {
00174   userCode.push_back(CodeChunk(pos, block));
00175 }
00176 
00177 
00178 void ScannerSpec::setInheritance (const string &inheritance_,
00179                                   const Position &pos)
00180 {
00181   inheritance.pos = pos;
00182   inheritance.code = inheritance_;
00183 }
00184 
00185 
00186 void ScannerSpec::setClassName (const string &name_)
00187 {
00188   className = name_;
00189 }
00190 
00191 
00192 bool operator == (const LexicalStateSpec &st, const string &s)
00193 {
00194   return st.name == s;
00195 }
00196 
00197 
00198 void ScannerSpec::updateStates (const vector<string> &tokStates,
00199                                 DfaSourceRe *regexp)
00200 {
00201   
00202 
00203 
00204 
00205 
00206 
00207 
00208 
00209 
00210   vector<string> vs = tokStates;
00211   
00212   if (vs.size() == 0) vs.push_back("START");
00213   if (vs[0] == "*") {
00214     if (starRe != NULL)
00215       starRe = dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe, regexp,
00216                                                                    Position()));
00217     else starRe = regexp;
00218     
00219     for (int i = 0; i < states.size(); i++)
00220       states[i].regexp =
00221         dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(states[i].regexp,
00222                                                             regexp->clone(),
00223                                                             Position()));
00224   } else {
00225     DfaSourceRe *tmp = regexp; 
00226                              
00227     for (int i = 0; i < vs.size(); i++)
00228     {
00229       vector<LexicalStateSpec>::iterator j;
00230       if (tmp == NULL) tmp = dynamic_cast<DfaSourceRe*>(regexp->clone());
00231       
00232       if ((j = find(states.begin(), states.end(), vs[i])) != states.end())
00233       { 
00234         (*j).regexp =
00235           dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode((*j).regexp,
00236                                                               tmp,
00237                                                               Position()));
00238         tmp = NULL; 
00239       } else 
00240       { 
00241         if (starRe != NULL)
00242           tmp =
00243             dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe->clone(),
00244                                                                 tmp,
00245                                                                 Position()));
00246         states.push_back(LexicalStateSpec(vs[i], tmp));
00247         tmp = NULL;  
00248       }
00249     }
00250   }
00251 }
00252 
00253 
00254 #ifdef DEBUG
00255 
00256 void ScannerSpec::dump (ostream &os) const
00257 {
00258   os << "ScannerSpec duump: " << endl
00259      << "className: " << className << endl
00260      << "inheritance: " << inheritance.code << endl
00261      << "preambleCode:" << endl
00262      << preambleCode.code << endl
00263      << "----------------------------------------------------------" << endl
00264      << "userCode:" << endl;
00265   for (int i = 0; i < userCode.size(); i++)
00266     os << userCode[i].code;
00267   os << endl
00268      << "----------------------------------------------------------" << endl
00269      << "Tokens: " << endl;
00270   for (int i = 0; i < tokens.count(); i++)
00271     tokens[i].dump(os);
00272   os << "----------------------------------------------------------" << endl;
00273   os << "States:" << endl;
00274   for (int i = 0; i < states.size(); i++) {
00275     states[i].dump(os);
00276     os << "----------------------------------------------------------" << endl;
00277   }
00278 }
00279 
00280 void LexicalStateSpec::dump (ostream &os) const
00281 {
00282   os << "State: " << name << endl
00283      << "Regexp: " << endl;
00284   regexp->dump(os);
00285   os << endl;
00286 }
00287 
00288 
00289 #endif