#ifndef _FILTERGROUP #define _FILTERGROUP /* Filter Groups are for processing strings through many regular expressions. parameters can also be placed in the expressions (for flexible reuse) enum REGEX_FLAGS { NOFLAGS = 0x0000, NOCASE = 0x0001, // ignore case GLOBAL = 0x0002, // match everywhere in the string MULTILINE = 0x0004, // ^ and $ can match internal line breaks SINGLELINE = 0x0008, // . can match newline character RIGHTMOST = 0x0010, // start matching at the right of the string NOBACKREFS = 0x0020, // only meaningful when used with GLOBAL and substitute FIRSTBACKREFS = 0x0040, // only meaningful when used with GLOBAL ALLBACKREFS = 0x0080, // only meaningful when used with GLOBAL NORMALIZE = 0x0100, // Preprocess patterns: "\\n" => "\n", etc. EXTENDED = 0x0200, // ignore whitespace in pattern }; */ #define _MULTIPASSMAX 1000 #include "define.h" #include "define_platform.h" #include "debug.h" #include "extensions.h" #include "StringMap.h" #include #include #include #include #include "regexpr2.h" //here because of the ARRAYSIZE decleration in winnt.h using namespace std; using namespace regex; //-------------------------------------------- Filter ----------------------------------------------- /* Filter class has also an exceptions regEx which is used to provide exceptions to the main pattern this is because negative group word pattern matching is difficult in POSIX For example: [greenpages/*.html] with exception of [greenpages/.*(index|contents)\.html] Functioning: match() function matches the entire string on the main pattern where not matches the entire string on the exception count() number of matches in the input findAll() function gets all substrings and then ignores each substring that matches the exception (not the entire input string) submatches() the submatches in the first match replace() replaces all matches with registered replacement $PARAMS: note that $PARAMS in the main regex are compiled in at the start (and passed in) and static for all uses of the Filter. $PARAMS in the replacement are dynamicaly calculated on every use of the Filter Thus $RELATIVEHREF will be replaced each time in the replacement In order to be re-entrant the $PARAMS are passed into the call and held on the stack during matching */ class Filter { struct parameterLoc { const char *rstart; const char *rend; const char *replacement; size_t replacelen; }; //many threads/objects may use this filter at the same time (static for example) int m_iReferences; //number of objects that are using this (addRef()/release() like windows COM) const int m_iFilterId, m_iRunOrder; //the following have $PARAMS in them but are read only and //fixed for this object and all who use it const char *m_sFilterRegEx, *m_sFilterRegExExceptions; //$PARAMS are replaced on compilation (for all) const char *m_sReplacement; //copied and $PARAMS replaced in replace() (different each time) //read-only and fixed for this object and all who use it const char *m_sDescription; const REGEX_FLAGS m_iFlags; const bool m_bMultipass; //base regex objects rpattern_c *m_rgx, *m_rgxExceptions; //generic parameterise function that will do replacement, regex, or anything const bool parameterise(const char *input, const StringMap *parameters, const char **output) const; const bool compile(const StringMap *parameters = 0); const char *details() const; public: Filter(const int iFilterId, const int iRunOrder, const char *sFilterRegEx, const char *sReplacement=0, const char *sFilterRegExExceptions=0, const char *sDescription=0, const REGEX_FLAGS iFlags=NOFLAGS, const bool bMultipass=false, const StringMap *parameters=0); ~Filter(); //COM style reference counting const int addRef() {return m_iReferences++;} const int release() {if (--m_iReferences==0) delete this;return m_iReferences;} //Filters must be new'd friend int operator<<(ostream &os, Filter &fFilter) {const char *details = fFilter.details(); os << details; free((void*)details);} operator const char*() const {return details();} operator const int() const {return m_iRunOrder;} const bool compiled() const {return (m_rgx!=0);} //accessors const int filterId() const {return m_iFilterId;} const char *regEx() const {return m_sFilterRegEx;} const int runOrder() const {return m_iRunOrder;} const char *description() const {return m_sDescription;} //functions const size_t replace(const char *input, const StringMap *parameters, const char **output, const size_t refReserve=50); const size_t findAll(const char *input, const StringMap *parameters, vector *vMatchesOut, const bool unique = false) const; bool submatches(const char *input, vector *vSubMatchesOut) const; const size_t count(const char *input); const bool match(const char *haystack) const; }; //-------------------------------------------- FilterGroup ----------------------------------------------- //A map is used because it is automatically sorted (by runOrder). class FilterGroup: public multimap { int m_iStandardFilterGroupId; public: enum enmConflictMode { conflict_includeAll, conflict_overwriteOld, conflict_ignoreNew, conflict_promoteNew, conflict_demoteNew }; FilterGroup() {} FilterGroup(Filter *f) {addFilter(f);} FilterGroup(const char *regex) {addFilter(new Filter(1, 1, regex));} //FilterGroup deletes Filters at ~FilterGrop() ~FilterGroup(); iterator last() {return --end();} friend int operator<<(ostream &std, FilterGroup &fgFG) {for (const_iterator iM = fgFG.begin(); iM != fgFG.end(); iM++) {std << *iM->second;std << "\n";}} Filter *filter(const int iRunOrder = 0) const; //management const int addFilter(Filter *pFilter, const enmConflictMode iConflictMode=conflict_includeAll); const int clearFilters(); const size_t merge(FilterGroup *fgFilterGroup, const enmConflictMode iConflictMode=conflict_includeAll); //functions const size_t replace( const char *input, const StringMap *parameters, const char **output); const size_t findAll( const char *input, const StringMap *parameters, vector *vMatchesOut, const bool unique = false) const; const bool matchAll(const char *haystack) const; const bool matchAny(const char *haystack) const; }; #endif