00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 #include <algorithm>
00055
00056 #include "debug.h"
00057 #include "scanner_spec.hh"
00058 #include "dfa_re_node_builder.hh"
00059 #include "itoken_spec.hh"
00060
00061 ScannerSpec::ScannerSpec (ITokenSpec &tokens_, PropRegistry ®istry) :
00062 nodeBuilder(*new DfaReNodeBuilder(registry)),
00063 tokens(tokens_),
00064 starRe(NULL)
00065 {
00066
00067 tokens.addToken("eof", Position());
00068 }
00069
00070
00071 ScannerSpec::~ScannerSpec ()
00072 {
00073 delete &nodeBuilder;
00074 }
00075
00076
00077 IReNodeBuilder& ScannerSpec::getReNodeBuilder ()
00078 {
00079 return nodeBuilder;
00080 }
00081
00082
00083 void ScannerSpec::addRegToken (const vector<string> &states,
00084 const string &name, ReNode *regexp,
00085 const string &tokenAction,
00086 const Position &pos)
00087 throw (ParseException)
00088 {
00089 if (tokens.isToken(name))
00090 throw ParseException(pos, "Duplicate token name.");
00091
00092 regexp = nodeBuilder.createCatNode(regexp,
00093 nodeBuilder.createEotNode(tokens.count(),
00094 pos),
00095 pos);
00096 tokens.addToken(name, ITokenSpec::regular, regexp, tokenAction, pos);
00097 updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00098 }
00099
00100
00101 void ScannerSpec::addSkipToken (const vector<string> &states,
00102 const string &name, ReNode *regexp,
00103 const string &tokenAction,
00104 const Position &pos)
00105 throw (ParseException)
00106 {
00107 if (tokens.isToken(name))
00108 throw ParseException(pos, "Duplicate token name.");
00109
00110 regexp = nodeBuilder.createCatNode(regexp,
00111 nodeBuilder.createEotNode(tokens.count(), pos),
00112 pos);
00113 tokens.addToken(name, ITokenSpec::skip, regexp, tokenAction, pos);
00114
00115 updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00116 }
00117
00118
00119 void ScannerSpec::addMoreToken (const vector<string> &states,
00120 const string &name, ReNode *regexp,
00121 const string &tokenAction,
00122 const Position &pos)
00123 throw (ParseException)
00124 {
00125 if (tokens.isToken(name))
00126 throw ParseException(pos, "Duplicate token name.");
00127
00128 regexp = nodeBuilder.createCatNode(regexp,
00129 nodeBuilder.createEotNode(tokens.count(), pos),
00130 pos);
00131 tokens.addToken(name, ITokenSpec::more, regexp, tokenAction, pos);
00132
00133 updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00134 }
00135
00136
00137 void ScannerSpec::addKeywordToken (const vector<string> &states,
00138 const string &name, ReNode *regexp,
00139 const string &tokenAction,
00140 const Position &pos)
00141 throw (ParseException)
00142 {
00143 if (tokens.isToken(name))
00144 throw ParseException(pos, "Duplicate token name.");
00145
00146 regexp = nodeBuilder.createCatNode(regexp,
00147 nodeBuilder.createEotNode(tokens.count(), pos),
00148 pos);
00149 tokens.addToken(name, ITokenSpec::keyword, regexp, tokenAction, pos);
00150
00151 updateStates(states, dynamic_cast<DfaSourceRe*>(regexp));
00152 }
00153
00154
00155 void ScannerSpec::addSpecialToken (const string &name, const Position &pos)
00156 throw (ParseException)
00157 {
00158 if (tokens.isToken(name))
00159 throw ParseException(pos, "Duplicate token name.");
00160
00161 tokens.addToken(name, pos);
00162 }
00163
00164
00165 void ScannerSpec::setPreambleCode (const string &block, const Position &pos)
00166 {
00167 preambleCode.code = block;
00168 preambleCode.pos = pos;
00169 }
00170
00171
00172 void ScannerSpec::addCodeBlock (const string &block, const Position &pos)
00173 {
00174 userCode.push_back(CodeChunk(pos, block));
00175 }
00176
00177
00178 void ScannerSpec::setInheritance (const string &inheritance_,
00179 const Position &pos)
00180 {
00181 inheritance.pos = pos;
00182 inheritance.code = inheritance_;
00183 }
00184
00185
00186 void ScannerSpec::setClassName (const string &name_)
00187 {
00188 className = name_;
00189 }
00190
00191
00192 bool operator == (const LexicalStateSpec &st, const string &s)
00193 {
00194 return st.name == s;
00195 }
00196
00197
00198 void ScannerSpec::updateStates (const vector<string> &tokStates,
00199 DfaSourceRe *regexp)
00200 {
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210 vector<string> vs = tokStates;
00211
00212 if (vs.size() == 0) vs.push_back("START");
00213 if (vs[0] == "*") {
00214 if (starRe != NULL)
00215 starRe = dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe, regexp,
00216 Position()));
00217 else starRe = regexp;
00218
00219 for (int i = 0; i < states.size(); i++)
00220 states[i].regexp =
00221 dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(states[i].regexp,
00222 regexp->clone(),
00223 Position()));
00224 } else {
00225 DfaSourceRe *tmp = regexp;
00226
00227 for (int i = 0; i < vs.size(); i++)
00228 {
00229 vector<LexicalStateSpec>::iterator j;
00230 if (tmp == NULL) tmp = dynamic_cast<DfaSourceRe*>(regexp->clone());
00231
00232 if ((j = find(states.begin(), states.end(), vs[i])) != states.end())
00233 {
00234 (*j).regexp =
00235 dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode((*j).regexp,
00236 tmp,
00237 Position()));
00238 tmp = NULL;
00239 } else
00240 {
00241 if (starRe != NULL)
00242 tmp =
00243 dynamic_cast<DfaSourceRe*>(nodeBuilder.createOrNode(starRe->clone(),
00244 tmp,
00245 Position()));
00246 states.push_back(LexicalStateSpec(vs[i], tmp));
00247 tmp = NULL;
00248 }
00249 }
00250 }
00251 }
00252
00253
00254 #ifdef DEBUG
00255
00256 void ScannerSpec::dump (ostream &os) const
00257 {
00258 os << "ScannerSpec duump: " << endl
00259 << "className: " << className << endl
00260 << "inheritance: " << inheritance.code << endl
00261 << "preambleCode:" << endl
00262 << preambleCode.code << endl
00263 << "----------------------------------------------------------" << endl
00264 << "userCode:" << endl;
00265 for (int i = 0; i < userCode.size(); i++)
00266 os << userCode[i].code;
00267 os << endl
00268 << "----------------------------------------------------------" << endl
00269 << "Tokens: " << endl;
00270 for (int i = 0; i < tokens.count(); i++)
00271 tokens[i].dump(os);
00272 os << "----------------------------------------------------------" << endl;
00273 os << "States:" << endl;
00274 for (int i = 0; i < states.size(); i++) {
00275 states[i].dump(os);
00276 os << "----------------------------------------------------------" << endl;
00277 }
00278 }
00279
00280 void LexicalStateSpec::dump (ostream &os) const
00281 {
00282 os << "State: " << name << endl
00283 << "Regexp: " << endl;
00284 regexp->dump(os);
00285 os << endl;
00286 }
00287
00288
00289 #endif