00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <antlr/config.hpp>
00012
00013 #include <map>
00014 #include <strings.h>
00015 #include <cstdio>
00016
00017 #ifdef HAS_NOT_CCTYPE_H
00018 #include <ctype.h>
00019 #else
00020 #include <cctype>
00021 #endif
00022
00023 #if ( _MSC_VER == 1200 )
00024
00025
00026 # include <stdio.h>
00027 #endif
00028
00029 #include <antlr/TokenStream.hpp>
00030 #include <antlr/RecognitionException.hpp>
00031 #include <antlr/SemanticException.hpp>
00032 #include <antlr/MismatchedCharException.hpp>
00033 #include <antlr/InputBuffer.hpp>
00034 #include <antlr/BitSet.hpp>
00035 #include <antlr/LexerSharedInputState.hpp>
00036
00037 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00038 namespace antlr {
00039 #endif
00040
00041 class ANTLR_API CharScanner;
00042
00043 ANTLR_C_USING(tolower)
00044
00045 #ifdef ANTLR_REALLY_NO_STRCASECMP
00046
00047
00048 inline int strcasecmp(const char *s1, const char *s2)
00049 {
00050 while (true)
00051 {
00052 char c1 = tolower(*s1++),
00053 c2 = tolower(*s2++);
00054 if (c1 < c2) return -1;
00055 if (c1 > c2) return 1;
00056 if (c1 == 0) return 0;
00057 }
00058 }
00059 #else
00060 #ifdef NO_STRCASECMP
00061 ANTLR_C_USING(stricmp)
00062 #else
00063 ANTLR_C_USING(strcasecmp)
00064 #endif
00065 #endif
00066
00069 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00070 private:
00071 const CharScanner* scanner;
00072 public:
00073 #ifdef NO_TEMPLATE_PARTS
00074 CharScannerLiteralsLess() {}
00075 #endif
00076 CharScannerLiteralsLess(const CharScanner* theScanner)
00077 : scanner(theScanner)
00078 {
00079 }
00080 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00081
00082
00083
00084 };
00085
00088 class ANTLR_API CharScanner : public TokenStream {
00089 protected:
00090 typedef RefToken (*factory_type)();
00091 public:
00092 CharScanner(InputBuffer& cb, bool case_sensitive );
00093 CharScanner(InputBuffer* cb, bool case_sensitive );
00094 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00095
00096 virtual ~CharScanner()
00097 {
00098 }
00099
00100 virtual int LA(unsigned int i);
00101
00102 virtual void append(char c)
00103 {
00104 if (saveConsumedInput)
00105 {
00106 size_t l = text.length();
00107
00108 if ((l%256) == 0)
00109 text.reserve(l+256);
00110
00111 text.replace(l,0,&c,1);
00112 }
00113 }
00114
00115 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00116 {
00117 if( saveConsumedInput )
00118 text += s;
00119 }
00120
00121 virtual void commit()
00122 {
00123 inputState->getInput().commit();
00124 }
00125
00129 virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
00130 {
00131 consume();
00132 consumeUntil(tokenSet);
00133 }
00134
00135 virtual void consume()
00136 {
00137 if (inputState->guessing == 0)
00138 {
00139 int c = LA(1);
00140 if (caseSensitive)
00141 {
00142 append(c);
00143 }
00144 else
00145 {
00146
00147
00148 append(inputState->getInput().LA(1));
00149 }
00150
00151
00152 if (c == '\t')
00153 tab();
00154 else
00155 inputState->column++;
00156 }
00157 inputState->getInput().consume();
00158 }
00159
00161 virtual void consumeUntil(int c)
00162 {
00163 for(;;)
00164 {
00165 int la_1 = LA(1);
00166 if( la_1 == EOF_CHAR || la_1 == c )
00167 break;
00168 consume();
00169 }
00170 }
00171
00173 virtual void consumeUntil(const BitSet& set)
00174 {
00175 for(;;)
00176 {
00177 int la_1 = LA(1);
00178 if( la_1 == EOF_CHAR || set.member(la_1) )
00179 break;
00180 consume();
00181 }
00182 }
00183
00185 virtual unsigned int mark()
00186 {
00187 return inputState->getInput().mark();
00188 }
00190 virtual void rewind(unsigned int pos)
00191 {
00192 inputState->getInput().rewind(pos);
00193 }
00194
00196 virtual void match(int c)
00197 {
00198 int la_1 = LA(1);
00199 if ( la_1 != c )
00200 throw MismatchedCharException(la_1, c, false, this);
00201 consume();
00202 }
00203
00207 virtual void match(const BitSet& b)
00208 {
00209 int la_1 = LA(1);
00210
00211 if ( !b.member(la_1) )
00212 throw MismatchedCharException( la_1, b, false, this );
00213 consume();
00214 }
00215
00219 virtual void match( const char* s )
00220 {
00221 while( *s != '\0' )
00222 {
00223
00224 int la_1 = LA(1), c = (*s++ & 0xFF);
00225
00226 if ( la_1 != c )
00227 throw MismatchedCharException(la_1, c, false, this);
00228
00229 consume();
00230 }
00231 }
00235 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00236 {
00237 size_t len = s.length();
00238
00239 for (size_t i = 0; i < len; i++)
00240 {
00241
00242 int la_1 = LA(1), c = (s[i] & 0xFF);
00243
00244 if ( la_1 != c )
00245 throw MismatchedCharException(la_1, c, false, this);
00246
00247 consume();
00248 }
00249 }
00253 virtual void matchNot(int c)
00254 {
00255 int la_1 = LA(1);
00256
00257 if ( la_1 == c )
00258 throw MismatchedCharException(la_1, c, true, this);
00259
00260 consume();
00261 }
00265 virtual void matchRange(int c1, int c2)
00266 {
00267 int la_1 = LA(1);
00268
00269 if ( la_1 < c1 || la_1 > c2 )
00270 throw MismatchedCharException(la_1, c1, c2, false, this);
00271
00272 consume();
00273 }
00274
00275 virtual bool getCaseSensitive() const
00276 {
00277 return caseSensitive;
00278 }
00279
00280 virtual void setCaseSensitive(bool t)
00281 {
00282 caseSensitive = t;
00283 }
00284
00285 virtual bool getCaseSensitiveLiterals() const=0;
00286
00288 virtual int getLine() const
00289 {
00290 return inputState->line;
00291 }
00292
00294 virtual void setLine(int l)
00295 {
00296 inputState->line = l;
00297 }
00298
00300 virtual int getColumn() const
00301 {
00302 return inputState->column;
00303 }
00305 virtual void setColumn(int c)
00306 {
00307 inputState->column = c;
00308 }
00309
00311 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00312 {
00313 return inputState->filename;
00314 }
00316 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00317 {
00318 inputState->filename = f;
00319 }
00320
00321 virtual bool getCommitToPath() const
00322 {
00323 return commitToPath;
00324 }
00325
00326 virtual void setCommitToPath(bool commit)
00327 {
00328 commitToPath = commit;
00329 }
00330
00332 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00333 {
00334 return text;
00335 }
00336
00337 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00338 {
00339 text = s;
00340 }
00341
00342 virtual void resetText()
00343 {
00344 text = "";
00345 inputState->tokenStartColumn = inputState->column;
00346 inputState->tokenStartLine = inputState->line;
00347 }
00348
00349 virtual RefToken getTokenObject() const
00350 {
00351 return _returnToken;
00352 }
00353
00357 virtual void newline()
00358 {
00359 ++inputState->line;
00360 inputState->column = 1;
00361 }
00362
00367 virtual void tab()
00368 {
00369 int c = getColumn();
00370 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;
00371 setColumn( nc );
00372 }
00374 int setTabsize( int size )
00375 {
00376 int oldsize = tabsize;
00377 tabsize = size;
00378 return oldsize;
00379 }
00381 int getTabSize() const
00382 {
00383 return tabsize;
00384 }
00385
00387 virtual void reportError(const RecognitionException& e);
00388
00390 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00391
00393 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00394
00395 virtual InputBuffer& getInputBuffer()
00396 {
00397 return inputState->getInput();
00398 }
00399
00400 virtual LexerSharedInputState getInputState()
00401 {
00402 return inputState;
00403 }
00404
00407 virtual void setInputState(LexerSharedInputState state)
00408 {
00409 inputState = state;
00410 }
00411
00413 virtual void setTokenObjectFactory(factory_type factory)
00414 {
00415 tokenFactory = factory;
00416 }
00417
00421 virtual int testLiteralsTable(int ttype) const
00422 {
00423 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00424 if (i != literals.end())
00425 ttype = (*i).second;
00426 return ttype;
00427 }
00428
00434 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00435 {
00436 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00437 if (i != literals.end())
00438 ttype = (*i).second;
00439 return ttype;
00440 }
00441
00443 virtual int toLower(int c) const
00444 {
00445
00446
00447
00448 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00449 }
00450
00466 virtual void uponEOF()
00467 {
00468 }
00469
00471 virtual void traceIndent();
00472 virtual void traceIn(const char* rname);
00473 virtual void traceOut(const char* rname);
00474
00475 #ifndef NO_STATIC_CONSTS
00476 static const int EOF_CHAR = EOF;
00477 #else
00478 enum {
00479 EOF_CHAR = EOF
00480 };
00481 #endif
00482 protected:
00483 ANTLR_USE_NAMESPACE(std)string text;
00484
00485 bool saveConsumedInput;
00486 factory_type tokenFactory;
00487 bool caseSensitive;
00488 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals;
00489
00490 RefToken _returnToken;
00491
00493 LexerSharedInputState inputState;
00494
00499 bool commitToPath;
00500
00501 int tabsize;
00502
00504 virtual RefToken makeToken(int t)
00505 {
00506 RefToken tok = tokenFactory();
00507 tok->setType(t);
00508 tok->setColumn(inputState->tokenStartColumn);
00509 tok->setLine(inputState->tokenStartLine);
00510 return tok;
00511 }
00512
00515 class Tracer {
00516 private:
00517 CharScanner* parser;
00518 const char* text;
00519
00520 Tracer(const Tracer& other);
00521 Tracer& operator=(const Tracer& other);
00522 public:
00523 Tracer( CharScanner* p,const char* t )
00524 : parser(p), text(t)
00525 {
00526 parser->traceIn(text);
00527 }
00528 ~Tracer()
00529 {
00530 parser->traceOut(text);
00531 }
00532 };
00533
00534 int traceDepth;
00535 private:
00536 CharScanner( const CharScanner& other );
00537 CharScanner& operator=( const CharScanner& other );
00538
00539 #ifndef NO_STATIC_CONSTS
00540 static const int NO_CHAR = 0;
00541 #else
00542 enum {
00543 NO_CHAR = 0
00544 };
00545 #endif
00546 };
00547
00548 inline int CharScanner::LA(unsigned int i)
00549 {
00550 int c = inputState->getInput().LA(i);
00551
00552 if ( caseSensitive )
00553 return c;
00554 else
00555 return toLower(c);
00556 }
00557
00558 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00559 {
00560 if (scanner->getCaseSensitiveLiterals())
00561 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00562 else
00563 {
00564 #ifdef NO_STRCASECMP
00565 return (stricmp(x.c_str(),y.c_str())<0);
00566 #else
00567 return (strcasecmp(x.c_str(),y.c_str())<0);
00568 #endif
00569 }
00570 }
00571
00572 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00573 }
00574 #endif
00575
00576 #endif //INC_CharScanner_hpp__