![]() |
Main Page Class Hierarchy Alphabetical List Compound List File List Compound Members
![]() |
00001 /******************************************************************************** 00002 * * 00003 * R e g u l a r E x p r e s s i o n C l a s s * 00004 * * 00005 ********************************************************************************* 00006 * Copyright (C) 1999,2009 by Jeroen van der Zijp. All Rights Reserved. * 00007 ********************************************************************************* 00008 * This library is free software; you can redistribute it and/or modify * 00009 * it under the terms of the GNU Lesser General Public License as published by * 00010 * the Free Software Foundation; either version 3 of the License, or * 00011 * (at your option) any later version. * 00012 * * 00013 * This library is distributed in the hope that it will be useful, * 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00016 * GNU Lesser General Public License for more details. * 00017 * * 00018 * You should have received a copy of the GNU Lesser General Public License * 00019 * along with this program. If not, see <http://www.gnu.org/licenses/> * 00020 ********************************************************************************* 00021 * $Id: FXRex.h,v 1.63 2009/01/06 13:07:27 fox Exp $ * 00022 ********************************************************************************/ 00023 #ifndef FXREX_H 00024 #define FXREX_H 00025 00026 00027 namespace FX { 00028 00029 00030 /** 00031 * FXRex is a regular expression class implementing a NFA matcher. 00032 * It supports capturing parentheses, non-capturing parentheses, 00033 * positive or negative lookahead, backreferences, case-insensitive 00034 * matching, counted repetitions, lazy or greedy matches, and 00035 * PERL-like matching operators. 00036 * The subject string may be scanned forwards or backwards, and may 00037 * contain any of 256 possible character values. 00038 * 00039 * When parsing a regular expression pattern, the mode parameter is 00040 * the bitwise OR of a set of flags and affects the match algorithm. 00041 * Passing the flag Capture enables capturing parentheses 00042 * and back references. The flag IgnoreCase enables case-insensitive 00043 * matching. When the flag Newline is passed, newlines are treated 00044 * like normal characters; otherwise, newline is NOT matched 00045 * except when explicitly part of a character class. The flag 00046 * Verbatim disables all special character interpretation. 00047 * 00048 * When matching a compiled pattern, the mode parameter is the 00049 * bitwise OR of a set of flags that affects how the match is 00050 * performed. Passing the flag Backward causes the match 00051 * to proceed backwards through the subject string. Passing the 00052 * flags NotBol and/or NotEol causes the begin and 00053 * end of the subject string NOT to be considered a line start 00054 * or line end. The flag NotEmpty causes a match to fail if 00055 * the empty string was matched. 00056 */ 00057 class FXAPI FXRex { 00058 private: 00059 FXint *code; 00060 private: 00061 static const FXchar *const errors[]; 00062 static const FXint fallback[]; 00063 public: 00064 00065 /// Regular expression parse flags 00066 enum { 00067 Normal = 0, /// Normal mode 00068 Capture = 1, /// Perform capturing parentheses 00069 IgnoreCase = 2, /// Ignore case differences 00070 Newline = 4, /// Match-any operators match newline too 00071 Verbatim = 8, /// Disable interpretation of magic characters 00072 Syntax = 16 /// Perform syntax check only 00073 }; 00074 00075 00076 /// Regular expression match flags 00077 enum { 00078 Forward = 0, /// Match scanning forward from offset 00079 Backward = 32, /// Match scanning backward from offset 00080 NotBol = 64, /// Start of string is NOT begin of line 00081 NotEol = 128, /// End of string is NOT end of line 00082 NotEmpty = 256 /// Do not match empty 00083 }; 00084 00085 /// Regular expression error codes 00086 enum Error { 00087 ErrOK = 0, /// No errors 00088 ErrEmpty = 1, /// Empty pattern 00089 ErrParent = 2, /// Unmatched parenthesis 00090 ErrBracket = 3, /// Unmatched bracket 00091 ErrBrace = 4, /// Unmatched brace 00092 ErrRange = 5, /// Bad character range 00093 ErrEscape = 6, /// Bad escape sequence 00094 ErrCount = 7, /// Bad counted repeat 00095 ErrNoAtom = 8, /// No atom preceding repetition 00096 ErrRepeat = 9, /// Repeat following repeat 00097 ErrBackRef = 10, /// Bad backward reference 00098 ErrClass = 11, /// Bad character class 00099 ErrComplex = 12, /// Expression too complex 00100 ErrMemory = 13, /// Out of memory 00101 ErrToken = 14, /// Illegal token 00102 ErrBehind = 15 /// Bad look-behind pattern 00103 }; 00104 00105 public: 00106 00107 /// Construct empty regular expression object 00108 FXRex():code((FXint*)fallback){} 00109 00110 /// Copy regular expression object 00111 FXRex(const FXRex& orig); 00112 00113 /// Compile expression from pattern; if error is not NULL, error code is returned 00114 FXRex(const FXchar* pattern,FXint mode=Normal,Error* error=NULL); 00115 00116 /// Compile expression from pattern; if error is not NULL, error code is returned 00117 FXRex(const FXString& pattern,FXint mode=Normal,Error* error=NULL); 00118 00119 /// Assign another regular expression to this one 00120 FXRex& operator=(const FXRex& orig); 00121 00122 /** 00123 * See if regular expression is empty; the regular expression 00124 * will be empty when it is unable to parse a pattern due to 00125 * a syntax error. 00126 */ 00127 FXbool empty() const { return (code==fallback); } 00128 00129 /// Parse pattern, return error code if syntax error is found 00130 FXRex::Error parse(const FXchar* pattern,FXint mode=Normal); 00131 00132 /// Parse pattern, return error code if syntax error is found 00133 FXRex::Error parse(const FXString& pattern,FXint mode=Normal); 00134 00135 /** 00136 * Match a subject string of length len, returning true if a match is found 00137 * and false otherwise. The entire pattern is captured in beg[0] and end[0], 00138 * where beg[0] refers to the position of the first matched character and end[0] 00139 * refers to the position after the last matched character. 00140 * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i]. 00141 */ 00142 FXbool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=Forward,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00143 00144 /// Search for match in a string 00145 FXbool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=Forward,FXint npar=1,FXint fm=0,FXint to=2147483647) const; 00146 00147 /** 00148 * After performing a regular expression match with capturing parentheses, 00149 * a substitution string is build from the replace string, where where "&" 00150 * is replaced by the entire matched pattern, and "\1" through "\9" are 00151 * replaced by captured expressions. The original source string and its 00152 * length, and the match arrays beg and end must be passed. 00153 */ 00154 static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00155 00156 /// Return substitution string 00157 static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1); 00158 00159 /// Returns error code for given error 00160 static const FXchar* getError(FXRex::Error err){ return errors[err]; } 00161 00162 /// Comparison operators 00163 FXbool operator==(const FXRex& rex) const; 00164 FXbool operator!=(const FXRex& rex) const; 00165 00166 /// Saving and loading 00167 friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s); 00168 friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s); 00169 00170 /// Delete 00171 ~FXRex(); 00172 }; 00173 00174 00175 extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s); 00176 extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s); 00177 00178 } 00179 00180 #endif
![]() |