Main Page   Modules   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

basic_dfa_generator.cc

Go to the documentation of this file.
00001 /*
00002  *  File:       basic_dfa_generator.cc
00003  *              $Id: basic_dfa_generator.cc,v 1.7 2002/05/27 02:58:12 alec Exp $
00004  *
00005  *  Author:     Alec Panoviciu (alecu@email.com)
00006  * 
00007  *  Comments:
00008  *
00009  *  Revision history:
00010  *
00011  *  $Log: basic_dfa_generator.cc,v $
00012  *  Revision 1.7  2002/05/27 02:58:12  alec
00013  *  doc update
00014  *
00015  *  Revision 1.6  2002/05/22 01:31:42  alec
00016  *  cleaned up firstpo/followpos/nullable computation
00017  *
00018  *  Revision 1.5  2002/05/07 10:02:18  alec
00019  *  fixed some bugs & mem leaks; added MORE tokens support
00020  *
00021  *  Revision 1.4  2002/05/04 17:39:22  alec
00022  *  the scanner works (slightly tested)
00023  *
00024  *  Revision 1.3  2002/05/01 16:32:14  alec
00025  *  dfa ok. huh !
00026  *
00027  *  Revision 1.2  2002/05/01 09:18:26  alec
00028  *  - vBitset fixes
00029  *  - FOLLOWPOS written (not tested yet)
00030  *
00031  *  Revision 1.1  2002/04/30 16:26:38  alec
00032  *  my big endian will eat your little endian
00033  *
00034  */
00035 
00036 /*
00037   Copyright (C) 2002 Alexandru Panoviciu (alecu@email.com)
00038 
00039   This program is free software; you can redistribute it and/or modify
00040   it under the terms of the GNU General Public License as published by
00041   the Free Software Foundation; either version 2 of the License, or
00042   (at your option) any later version.
00043 
00044   This program is distributed in the hope that it will be useful,
00045   but WITHOUT ANY WARRANTY; without even the implied warranty of
00046   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00047   GNU General Public License for more details.
00048 
00049   You should have received a copy of the GNU General Public License
00050   along with this program; if not, write to the Free Software
00051   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00052 
00053  */
00054 
00055 #include <typeinfo>
00056 #include <queue>
00057 
00058 #include "prop_registry.hh"
00059 #include "scanner_spec.hh"
00060 #include "basic_dfa_generator.hh"
00061 #include "basic_dfa_spec.hh"
00062 #include "dfa_source_re.hh"
00063 #include "re_node.hh"
00064 #include "re_node_algo.hh"
00065 #include "itoken_spec.hh"
00066 
00067 BasicDfaGenerator::BasicDfaGenerator (ITokenSpec &tokens_,
00068                                       PropRegistry &registry_) :
00069   tokens(tokens_), registry(registry_)
00070 {}
00071 
00072 
00076 class LeafNumberGen : public ReNodeAlgo
00077 {
00078 public:
00079 
00080   LeafNumberGen () : counter(0) {}
00081 
00085   virtual bool operator () (ReNode &n)
00086   {
00087     ASSERT(&n != NULL, "Regexp tree corrupted !");
00088     
00089     if (typeid(n) == typeid(ReCharNode)) {
00090       ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00091       nn.pos = counter++;
00092     } else if (typeid(n) == typeid(ReEotNode)) {
00093       ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00094       nn.pos = counter++;
00095     } else if (typeid(n) == typeid(ReLambdaNode)) {
00096       ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00097       nn.pos = counter++;
00098     }
00099       
00100     return true;
00101   }
00102   
00103   unsigned int counter;
00104 };
00105 
00106 
00110 class FollowPosGen : public ReNodeAlgo
00111 {
00112 public:
00113   
00114   FollowPosGen (vBitset *table_) : table(table_) {}
00115 
00116   virtual bool operator () (ReNode &n)
00117   {
00118     if (typeid(n) == typeid(ReCatNode)) {
00119       ReCatNode &nn = dynamic_cast<ReCatNode&>(n);
00120       vBitset &preLast = nn.pre->lastPos;
00121       vBitset &postFirst = nn.post->firstPos;
00122       vBitset::reference ri = preLast.begin();
00123       for (unsigned int i = 0; ri.neq(preLast.end()); ri.next(), i++)
00124         if (ri.get())
00125           table[i] |= postFirst;
00126     } else if (typeid(n) == typeid(ReStarNode)) {
00127       ReStarNode &nn = dynamic_cast<ReStarNode&>(n);
00128       vBitset &nnLast = nn.lastPos;
00129       vBitset &nnFirst = nn.firstPos;
00130       vBitset::reference ri = nnLast.begin();
00131       for (unsigned int i = 0; ri.neq(nnLast.end()); ri.next(), i++)
00132         if (ri.get())
00133           table[i] |= nnFirst;
00134     }
00135 
00136     return true;
00137   }
00138   
00139   vBitset *table;
00140 };
00141 
00142 
00148 class LeafFinder : public ReNodeAlgo
00149 {
00150 public:
00151   
00152   LeafFinder (DfaSourceRe **leaves_) : leaves(leaves_)
00153   {}
00154 
00155   virtual bool operator () (ReNode &n)
00156   {
00157     ASSERT(&n != NULL, "Regexp tree corrupted !");
00158     
00159     if (typeid(n) == typeid(ReCharNode)) {
00160       ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00161       leaves[nn.pos] = &nn;
00162     } else if (typeid(n) == typeid(ReEotNode)) {
00163       ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00164       leaves[nn.pos] = &nn;
00165     } else if (typeid(n) == typeid(ReLambdaNode)) {
00166       ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00167       leaves[nn.pos] = &nn;
00168     }
00169     return true;
00170   }
00171 
00172   
00173   DfaSourceRe **leaves;
00174 };
00175 
00176 
00182 class AttributeGen : public ReNodeAlgo
00183 {
00184 public:
00185 
00186   virtual bool operator () (ReNode &n)
00187   {
00188     if (typeid(n) == typeid(ReCatNode)) {
00189       ReCatNode &nn = dynamic_cast<ReCatNode&>(n);
00190       nn.nullable = nn.pre->nullable && nn.post->nullable;
00191       nn.firstPos = nn.pre->firstPos;
00192       if (nn.pre->nullable) nn.firstPos |= nn.post->firstPos;
00193       nn.lastPos = nn.post->lastPos;
00194       if (nn.post->nullable) nn.lastPos |= nn.pre->lastPos;
00195     } else if (typeid(n) == typeid(ReOrNode)) {
00196       ReOrNode &nn = dynamic_cast<ReOrNode&>(n);
00197       nn.nullable = nn.pre->nullable || nn.post->nullable;
00198       nn.firstPos = nn.pre->firstPos;
00199       nn.firstPos |= nn.post->firstPos;
00200       nn.lastPos = nn.post->lastPos;
00201       nn.lastPos |= nn.pre->lastPos;
00202     } else if (typeid(n) == typeid(ReStarNode)) {
00203       ReStarNode &nn = dynamic_cast<ReStarNode&>(n);
00204       nn.nullable = true;
00205       nn.firstPos = nn.in->firstPos;
00206       nn.lastPos = nn.in->lastPos;
00207     } else if (typeid(n) == typeid(ReCharNode)) {
00208       ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00209       nn.nullable = false;
00210       nn.firstPos.resize(nn.pos + 1);
00211       nn.firstPos[nn.pos].set();
00212       nn.lastPos.resize(nn.pos + 1);
00213       nn.lastPos[nn.pos].set();
00214     } else if (typeid(n) == typeid(ReEotNode)) {
00215       ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00216       nn.nullable = true;
00217       nn.firstPos.resize(nn.pos + 1);
00218       nn.firstPos[nn.pos].set();
00219       nn.lastPos.resize(nn.pos + 1);
00220       nn.lastPos[nn.pos].set();
00221     } else if (typeid(n) == typeid(ReLambdaNode)) {
00222       ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00223       nn.nullable = true;
00224     } else ASSERT(0, "Bat typeid in AttributeGen::operator ()");
00225 
00226     return true;
00227   }
00228 };
00229 
00230 
00231 
00232 BasicDfaSpec& BasicDfaGenerator::createBasicDfa(const LexicalStateSpec &ls)
00233 {
00234   BasicDfaSpec &res = *new BasicDfaSpec(ls.name);
00235   DfaSourceRe *regexp = dynamic_cast<DfaSourceRe*>(ls.regexp);
00236 
00237   
00238   
00239   // 1. do the numbering
00240   LeafNumberGen nr;
00241   regexp->dfTraverse(nr);
00242 
00243   // This vector tells us where leave nodes are. At index i, we have a a pointer
00244   // to leaf node with pos == i
00245   DfaSourceRe **leaves = new (DfaSourceRe*)[nr.counter];
00246   LeafFinder lf(leaves);
00247   regexp->dfTraverse(lf);
00248   
00249   
00250   // 2. compute attributes
00251   AttributeGen aga;
00252   regexp->dfTraverse(aga);
00253   
00254 #ifdef DEBUG
00255   if (((string)registry["debug"]).find('f') != string::npos) {
00256     cerr << "-------------------------------------------------------------"
00257          << endl << "STATE NODES DUMP FOR STATE " << ls.name << endl;
00258     regexp->dump(cerr);
00259   }
00260 #endif
00261 
00262   // 3. compute the followpos
00263   vBitset *followPos = new vBitset[nr.counter];
00264   FollowPosGen fpg(followPos);
00265   regexp->dfTraverse(fpg);
00266 
00267 #ifdef DEBUG
00268   if (((string)registry["debug"]).find('f') != string::npos) {
00269     cerr << "-------------------------------------------------------------"
00270          << endl << "FOLLOWPOS table:" << endl;
00271     for (unsigned int i = 0; i < nr.counter; i++) {
00272       cerr << i << ": ";
00273       vBitset::reference ri = followPos[i].begin();
00274       for (unsigned int j = 0; ri.neq(followPos[i].end()); ri.next(), j++)
00275         if (ri.get()) cerr << j << " ";
00276       cerr << endl;
00277     }
00278   }
00279 #endif
00280 
00281   // the bitvector associated to each state
00282   vector<vBitset*> statesDesc;
00283 
00284   // states for which transitions have not been computed yet.
00285   // the number stored is the index into statesDesc where the state is described
00286   queue<int> q;
00287 
00288   // create the initial state
00289   statesDesc.push_back(new vBitset(regexp->firstPos));
00290   res.states.push_back(BasicDfaSpec::State());
00291   q.push(0); 
00292 
00293   while (!q.empty())
00294   {
00295     int Dnum = q.front(); q.pop();
00296     vBitset &Dset = *statesDesc[Dnum];
00297     
00298     for (unsigned int c = 0; c < 256; c++) {
00299       vBitset *Uset = new vBitset();
00300       
00301       vBitset::reference pi = Dset.begin();
00302       for (unsigned int p = 0; pi.neq(Dset.end()); p++, pi.next())
00303         if (pi.get() && (typeid(*leaves[p]) == typeid(ReCharNode))) {
00304           ReCharNode &n = dynamic_cast<ReCharNode&>(*leaves[p]);
00305           if (n.match == c)  // node n at position p matches c, then add
00306                               // followPos(n) into Udesc
00307             *Uset |= followPos[p];
00308         }
00309 
00310       if (Uset->all0()) {
00311         delete Uset;
00312         continue; //if U is empty, continue with next c
00313       }
00314       
00315       int Unum = 0;
00316 
00317       for (; Unum < statesDesc.size(); Unum++)
00318         if (*statesDesc[Unum] == *Uset) break;
00319       
00320       if (Unum == statesDesc.size()) {
00321         // U is a new state
00322         statesDesc.push_back(Uset);
00323         res.states.push_back(BasicDfaSpec::State());
00324         q.push(Unum);
00325       } else
00326         delete Uset;
00327       
00328       res.states[Dnum].addTransition(c, Unum);
00329       
00330     }
00331 
00332   } // while !w.empty()
00333 
00334 
00335   // determine and mark as such the accepting states
00336   // a state is accepting state if at least one of the position that describes
00337   // it is that of an EOT leaf node
00338   for (int Snum = 0; Snum < statesDesc.size(); Snum++) {
00339     vBitset::reference pi = statesDesc[Snum]->begin();
00340     for (unsigned int i = 0; pi.neq(statesDesc[Snum]->end()); i++, pi.next())
00341       if (pi.get() && (typeid(*leaves[i]) == typeid(ReEotNode))) {
00342         ReEotNode &n = dynamic_cast<ReEotNode&>(*leaves[i]);
00343         // this warning is damn annoying !
00344         if (res.states[Snum].isFinal) {
00345           /*  cerr << formatWarning(leaves[i]->getPos(), string("Regexp for token ") +
00346                                 tokens[res.states[Snum].tokId].name() +
00347                                 " hides regexp for token "
00348                                 + tokens[n.tokId].name() +
00349                                 " which might not get matched while in lexical state " +
00350                                 ls.name) << endl;*/
00351           continue;
00352         }
00353         res.states[Snum].isFinal = true;
00354         res.states[Snum].tokId = n.tokId;
00355       }
00356   }
00357  
00358   
00359 #ifdef DEBUG
00360   if (((string)registry["debug"]).find('f') != string::npos) {
00361     cerr << "----------------------------------------------------" << endl
00362          << "DFA dump:" << endl;
00363     res.dump(cerr);
00364   }
00365 #endif
00366 
00367   for (int i = 0; i < statesDesc.size(); i++) delete statesDesc[i];
00368   delete[] followPos;
00369   delete[] leaves;
00370   
00371   return res;
00372 }

Generated at Tue Jul 9 21:05:43 2002 for CppCC by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001