00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 #include <typeinfo>
00056 #include <queue>
00057
00058 #include "prop_registry.hh"
00059 #include "scanner_spec.hh"
00060 #include "basic_dfa_generator.hh"
00061 #include "basic_dfa_spec.hh"
00062 #include "dfa_source_re.hh"
00063 #include "re_node.hh"
00064 #include "re_node_algo.hh"
00065 #include "itoken_spec.hh"
00066
00067 BasicDfaGenerator::BasicDfaGenerator (ITokenSpec &tokens_,
00068 PropRegistry ®istry_) :
00069 tokens(tokens_), registry(registry_)
00070 {}
00071
00072
00076 class LeafNumberGen : public ReNodeAlgo
00077 {
00078 public:
00079
00080 LeafNumberGen () : counter(0) {}
00081
00085 virtual bool operator () (ReNode &n)
00086 {
00087 ASSERT(&n != NULL, "Regexp tree corrupted !");
00088
00089 if (typeid(n) == typeid(ReCharNode)) {
00090 ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00091 nn.pos = counter++;
00092 } else if (typeid(n) == typeid(ReEotNode)) {
00093 ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00094 nn.pos = counter++;
00095 } else if (typeid(n) == typeid(ReLambdaNode)) {
00096 ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00097 nn.pos = counter++;
00098 }
00099
00100 return true;
00101 }
00102
00103 unsigned int counter;
00104 };
00105
00106
00110 class FollowPosGen : public ReNodeAlgo
00111 {
00112 public:
00113
00114 FollowPosGen (vBitset *table_) : table(table_) {}
00115
00116 virtual bool operator () (ReNode &n)
00117 {
00118 if (typeid(n) == typeid(ReCatNode)) {
00119 ReCatNode &nn = dynamic_cast<ReCatNode&>(n);
00120 vBitset &preLast = nn.pre->lastPos;
00121 vBitset &postFirst = nn.post->firstPos;
00122 vBitset::reference ri = preLast.begin();
00123 for (unsigned int i = 0; ri.neq(preLast.end()); ri.next(), i++)
00124 if (ri.get())
00125 table[i] |= postFirst;
00126 } else if (typeid(n) == typeid(ReStarNode)) {
00127 ReStarNode &nn = dynamic_cast<ReStarNode&>(n);
00128 vBitset &nnLast = nn.lastPos;
00129 vBitset &nnFirst = nn.firstPos;
00130 vBitset::reference ri = nnLast.begin();
00131 for (unsigned int i = 0; ri.neq(nnLast.end()); ri.next(), i++)
00132 if (ri.get())
00133 table[i] |= nnFirst;
00134 }
00135
00136 return true;
00137 }
00138
00139 vBitset *table;
00140 };
00141
00142
00148 class LeafFinder : public ReNodeAlgo
00149 {
00150 public:
00151
00152 LeafFinder (DfaSourceRe **leaves_) : leaves(leaves_)
00153 {}
00154
00155 virtual bool operator () (ReNode &n)
00156 {
00157 ASSERT(&n != NULL, "Regexp tree corrupted !");
00158
00159 if (typeid(n) == typeid(ReCharNode)) {
00160 ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00161 leaves[nn.pos] = &nn;
00162 } else if (typeid(n) == typeid(ReEotNode)) {
00163 ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00164 leaves[nn.pos] = &nn;
00165 } else if (typeid(n) == typeid(ReLambdaNode)) {
00166 ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00167 leaves[nn.pos] = &nn;
00168 }
00169 return true;
00170 }
00171
00172
00173 DfaSourceRe **leaves;
00174 };
00175
00176
00182 class AttributeGen : public ReNodeAlgo
00183 {
00184 public:
00185
00186 virtual bool operator () (ReNode &n)
00187 {
00188 if (typeid(n) == typeid(ReCatNode)) {
00189 ReCatNode &nn = dynamic_cast<ReCatNode&>(n);
00190 nn.nullable = nn.pre->nullable && nn.post->nullable;
00191 nn.firstPos = nn.pre->firstPos;
00192 if (nn.pre->nullable) nn.firstPos |= nn.post->firstPos;
00193 nn.lastPos = nn.post->lastPos;
00194 if (nn.post->nullable) nn.lastPos |= nn.pre->lastPos;
00195 } else if (typeid(n) == typeid(ReOrNode)) {
00196 ReOrNode &nn = dynamic_cast<ReOrNode&>(n);
00197 nn.nullable = nn.pre->nullable || nn.post->nullable;
00198 nn.firstPos = nn.pre->firstPos;
00199 nn.firstPos |= nn.post->firstPos;
00200 nn.lastPos = nn.post->lastPos;
00201 nn.lastPos |= nn.pre->lastPos;
00202 } else if (typeid(n) == typeid(ReStarNode)) {
00203 ReStarNode &nn = dynamic_cast<ReStarNode&>(n);
00204 nn.nullable = true;
00205 nn.firstPos = nn.in->firstPos;
00206 nn.lastPos = nn.in->lastPos;
00207 } else if (typeid(n) == typeid(ReCharNode)) {
00208 ReCharNode &nn = dynamic_cast<ReCharNode&>(n);
00209 nn.nullable = false;
00210 nn.firstPos.resize(nn.pos + 1);
00211 nn.firstPos[nn.pos].set();
00212 nn.lastPos.resize(nn.pos + 1);
00213 nn.lastPos[nn.pos].set();
00214 } else if (typeid(n) == typeid(ReEotNode)) {
00215 ReEotNode &nn = dynamic_cast<ReEotNode&>(n);
00216 nn.nullable = true;
00217 nn.firstPos.resize(nn.pos + 1);
00218 nn.firstPos[nn.pos].set();
00219 nn.lastPos.resize(nn.pos + 1);
00220 nn.lastPos[nn.pos].set();
00221 } else if (typeid(n) == typeid(ReLambdaNode)) {
00222 ReLambdaNode &nn = dynamic_cast<ReLambdaNode&>(n);
00223 nn.nullable = true;
00224 } else ASSERT(0, "Bat typeid in AttributeGen::operator ()");
00225
00226 return true;
00227 }
00228 };
00229
00230
00231
00232 BasicDfaSpec& BasicDfaGenerator::createBasicDfa(const LexicalStateSpec &ls)
00233 {
00234 BasicDfaSpec &res = *new BasicDfaSpec(ls.name);
00235 DfaSourceRe *regexp = dynamic_cast<DfaSourceRe*>(ls.regexp);
00236
00237
00238
00239
00240 LeafNumberGen nr;
00241 regexp->dfTraverse(nr);
00242
00243
00244
00245 DfaSourceRe **leaves = new (DfaSourceRe*)[nr.counter];
00246 LeafFinder lf(leaves);
00247 regexp->dfTraverse(lf);
00248
00249
00250
00251 AttributeGen aga;
00252 regexp->dfTraverse(aga);
00253
00254 #ifdef DEBUG
00255 if (((string)registry["debug"]).find('f') != string::npos) {
00256 cerr << "-------------------------------------------------------------"
00257 << endl << "STATE NODES DUMP FOR STATE " << ls.name << endl;
00258 regexp->dump(cerr);
00259 }
00260 #endif
00261
00262
00263 vBitset *followPos = new vBitset[nr.counter];
00264 FollowPosGen fpg(followPos);
00265 regexp->dfTraverse(fpg);
00266
00267 #ifdef DEBUG
00268 if (((string)registry["debug"]).find('f') != string::npos) {
00269 cerr << "-------------------------------------------------------------"
00270 << endl << "FOLLOWPOS table:" << endl;
00271 for (unsigned int i = 0; i < nr.counter; i++) {
00272 cerr << i << ": ";
00273 vBitset::reference ri = followPos[i].begin();
00274 for (unsigned int j = 0; ri.neq(followPos[i].end()); ri.next(), j++)
00275 if (ri.get()) cerr << j << " ";
00276 cerr << endl;
00277 }
00278 }
00279 #endif
00280
00281
00282 vector<vBitset*> statesDesc;
00283
00284
00285
00286 queue<int> q;
00287
00288
00289 statesDesc.push_back(new vBitset(regexp->firstPos));
00290 res.states.push_back(BasicDfaSpec::State());
00291 q.push(0);
00292
00293 while (!q.empty())
00294 {
00295 int Dnum = q.front(); q.pop();
00296 vBitset &Dset = *statesDesc[Dnum];
00297
00298 for (unsigned int c = 0; c < 256; c++) {
00299 vBitset *Uset = new vBitset();
00300
00301 vBitset::reference pi = Dset.begin();
00302 for (unsigned int p = 0; pi.neq(Dset.end()); p++, pi.next())
00303 if (pi.get() && (typeid(*leaves[p]) == typeid(ReCharNode))) {
00304 ReCharNode &n = dynamic_cast<ReCharNode&>(*leaves[p]);
00305 if (n.match == c)
00306
00307 *Uset |= followPos[p];
00308 }
00309
00310 if (Uset->all0()) {
00311 delete Uset;
00312 continue;
00313 }
00314
00315 int Unum = 0;
00316
00317 for (; Unum < statesDesc.size(); Unum++)
00318 if (*statesDesc[Unum] == *Uset) break;
00319
00320 if (Unum == statesDesc.size()) {
00321
00322 statesDesc.push_back(Uset);
00323 res.states.push_back(BasicDfaSpec::State());
00324 q.push(Unum);
00325 } else
00326 delete Uset;
00327
00328 res.states[Dnum].addTransition(c, Unum);
00329
00330 }
00331
00332 }
00333
00334
00335
00336
00337
00338 for (int Snum = 0; Snum < statesDesc.size(); Snum++) {
00339 vBitset::reference pi = statesDesc[Snum]->begin();
00340 for (unsigned int i = 0; pi.neq(statesDesc[Snum]->end()); i++, pi.next())
00341 if (pi.get() && (typeid(*leaves[i]) == typeid(ReEotNode))) {
00342 ReEotNode &n = dynamic_cast<ReEotNode&>(*leaves[i]);
00343
00344 if (res.states[Snum].isFinal) {
00345
00346
00347
00348
00349
00350
00351 continue;
00352 }
00353 res.states[Snum].isFinal = true;
00354 res.states[Snum].tokId = n.tokId;
00355 }
00356 }
00357
00358
00359 #ifdef DEBUG
00360 if (((string)registry["debug"]).find('f') != string::npos) {
00361 cerr << "----------------------------------------------------" << endl
00362 << "DFA dump:" << endl;
00363 res.dump(cerr);
00364 }
00365 #endif
00366
00367 for (int i = 0; i < statesDesc.size(); i++) delete statesDesc[i];
00368 delete[] followPos;
00369 delete[] leaves;
00370
00371 return res;
00372 }