Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

FXRex.h

Go to the documentation of this file.
00001 /********************************************************************************
00002 *                                                                               *
00003 *                 R e g u l a r   E x p r e s s i o n   C l a s s               *
00004 *                                                                               *
00005 *********************************************************************************
00006 * Copyright (C) 1999,2009 by Jeroen van der Zijp.   All Rights Reserved.        *
00007 *********************************************************************************
00008 * This library is free software; you can redistribute it and/or modify          *
00009 * it under the terms of the GNU Lesser General Public License as published by   *
00010 * the Free Software Foundation; either version 3 of the License, or             *
00011 * (at your option) any later version.                                           *
00012 *                                                                               *
00013 * This library is distributed in the hope that it will be useful,               *
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of                *
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                 *
00016 * GNU Lesser General Public License for more details.                           *
00017 *                                                                               *
00018 * You should have received a copy of the GNU Lesser General Public License      *
00019 * along with this program.  If not, see <http://www.gnu.org/licenses/>          *
00020 *********************************************************************************
00021 * $Id: FXRex.h,v 1.63 2009/01/06 13:07:27 fox Exp $                             *
00022 ********************************************************************************/
00023 #ifndef FXREX_H
00024 #define FXREX_H
00025 
00026 
00027 namespace FX {
00028 
00029 
00030 /**
00031 * FXRex is a regular expression class implementing a NFA matcher.
00032 * It supports capturing parentheses, non-capturing parentheses,
00033 * positive or negative lookahead, backreferences, case-insensitive
00034 * matching, counted repetitions, lazy or greedy matches, and
00035 * PERL-like matching operators.
00036 * The subject string may be scanned forwards or backwards, and may
00037 * contain any of 256 possible character values.
00038 *
00039 * When parsing a regular expression pattern, the mode parameter is
00040 * the bitwise OR of a set of flags and affects the match algorithm.
00041 * Passing the flag Capture enables capturing parentheses
00042 * and back references. The flag IgnoreCase enables case-insensitive
00043 * matching. When the flag Newline is passed, newlines are treated
00044 * like normal characters; otherwise, newline is NOT matched
00045 * except when explicitly part of a character class. The flag
00046 * Verbatim disables all special character interpretation.
00047 *
00048 * When matching a compiled pattern, the mode parameter is the
00049 * bitwise OR of a set of flags that affects how the match is
00050 * performed.  Passing the flag Backward causes the match
00051 * to proceed backwards through the subject string.  Passing the
00052 * flags NotBol and/or NotEol causes the begin and
00053 * end of the subject string NOT to be considered a line start
00054 * or line end. The flag NotEmpty causes a match to fail if
00055 * the empty string was matched.
00056 */
00057 class FXAPI FXRex {
00058 private:
00059   FXint *code;
00060 private:
00061   static const FXchar *const errors[];
00062   static const FXint fallback[];
00063 public:
00064 
00065   /// Regular expression parse flags
00066   enum {
00067     Normal     = 0,     /// Normal mode
00068     Capture    = 1,     /// Perform capturing parentheses
00069     IgnoreCase = 2,     /// Ignore case differences
00070     Newline    = 4,     /// Match-any operators match newline too
00071     Verbatim   = 8,     /// Disable interpretation of magic characters
00072     Syntax     = 16     /// Perform syntax check only
00073     };
00074 
00075 
00076   /// Regular expression match flags
00077   enum {
00078     Forward    = 0,     /// Match scanning forward from offset
00079     Backward   = 32,    /// Match scanning backward from offset
00080     NotBol     = 64,    /// Start of string is NOT begin of line
00081     NotEol     = 128,   /// End of string is NOT end of line
00082     NotEmpty   = 256    /// Do not match empty
00083     };
00084 
00085   /// Regular expression error codes
00086   enum Error {
00087     ErrOK      = 0,     /// No errors
00088     ErrEmpty   = 1,     /// Empty pattern
00089     ErrParent  = 2,     /// Unmatched parenthesis
00090     ErrBracket = 3,     /// Unmatched bracket
00091     ErrBrace   = 4,     /// Unmatched brace
00092     ErrRange   = 5,     /// Bad character range
00093     ErrEscape  = 6,     /// Bad escape sequence
00094     ErrCount   = 7,     /// Bad counted repeat
00095     ErrNoAtom  = 8,     /// No atom preceding repetition
00096     ErrRepeat  = 9,     /// Repeat following repeat
00097     ErrBackRef = 10,    /// Bad backward reference
00098     ErrClass   = 11,    /// Bad character class
00099     ErrComplex = 12,    /// Expression too complex
00100     ErrMemory  = 13,    /// Out of memory
00101     ErrToken   = 14,    /// Illegal token
00102     ErrBehind  = 15     /// Bad look-behind pattern
00103     };
00104 
00105 public:
00106 
00107   /// Construct empty regular expression object
00108   FXRex():code((FXint*)fallback){}
00109 
00110   /// Copy regular expression object
00111   FXRex(const FXRex& orig);
00112 
00113   /// Compile expression from pattern; if error is not NULL, error code is returned
00114   FXRex(const FXchar* pattern,FXint mode=Normal,Error* error=NULL);
00115 
00116   /// Compile expression from pattern; if error is not NULL, error code is returned
00117   FXRex(const FXString& pattern,FXint mode=Normal,Error* error=NULL);
00118 
00119   /// Assign another regular expression to this one
00120   FXRex& operator=(const FXRex& orig);
00121 
00122   /**
00123   * See if regular expression is empty; the regular expression
00124   * will be empty when it is unable to parse a pattern due to
00125   * a syntax error.
00126   */
00127   FXbool empty() const { return (code==fallback); }
00128 
00129   /// Parse pattern, return error code if syntax error is found
00130   FXRex::Error parse(const FXchar* pattern,FXint mode=Normal);
00131 
00132   /// Parse pattern, return error code if syntax error is found
00133   FXRex::Error parse(const FXString& pattern,FXint mode=Normal);
00134 
00135   /**
00136   * Match a subject string of length len, returning true if a match is found
00137   * and false otherwise.  The entire pattern is captured in beg[0] and end[0],
00138   * where beg[0] refers to the position of the first matched character and end[0]
00139   * refers to the position after the last matched character.
00140   * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i].
00141   */
00142   FXbool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=Forward,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00143 
00144   /// Search for match in a string
00145   FXbool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=Forward,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
00146 
00147   /**
00148   * After performing a regular expression match with capturing parentheses,
00149   * a substitution string is build from the replace string, where where "&"
00150   * is replaced by the entire matched pattern, and "\1" through "\9" are
00151   * replaced by captured expressions.  The original source string and its
00152   * length, and the match arrays beg and end must be passed.
00153   */
00154   static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00155 
00156   /// Return substitution string
00157   static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
00158 
00159   /// Returns error code for given error
00160   static const FXchar* getError(FXRex::Error err){ return errors[err]; }
00161 
00162   /// Comparison operators
00163   FXbool operator==(const FXRex& rex) const;
00164   FXbool operator!=(const FXRex& rex) const;
00165 
00166   /// Saving and loading
00167   friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
00168   friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
00169 
00170   /// Delete
00171  ~FXRex();
00172   };
00173 
00174 
00175 extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
00176 extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
00177 
00178 }
00179 
00180 #endif

Copyright © 1997-2009 Jeroen van der Zijp