#include "FilterGroup.h" using namespace std; using namespace regex; //---------------------------------------------------- Filter -------------------------------------- Filter::Filter(const int iFilterId, const int iRunOrder, const char *sFilterRegEx, const char *sReplacement, const char *sFilterRegExExceptions, const char *sDescription, const REGEX_FLAGS iFlags, const bool bMultipass, const StringMap *parameters): m_iFilterId(iFilterId), m_iRunOrder(iRunOrder), //these are al short dups, Filters are low quantity objects and are used from everywhere so strdup(all) m_sFilterRegEx(strdupCheck(sFilterRegEx)), m_sFilterRegExExceptions(strdupCheck(sFilterRegExExceptions)), m_sReplacement(strdupCheck(sReplacement)), m_sDescription(strdupCheck(sDescription)), m_iFlags(iFlags), m_bMultipass(bMultipass) { compile(parameters); //auto-compile on construct } Filter::~Filter() { if (m_rgx) {delete m_rgx;m_rgx=0;} if (m_rgxExceptions) {delete m_rgxExceptions;m_rgxExceptions=0;} if (m_sFilterRegEx) {free((char*)m_sFilterRegEx); m_sFilterRegEx = 0;} if (m_sFilterRegExExceptions) {free((char*)m_sFilterRegExExceptions); m_sFilterRegExExceptions = 0;} if (m_sReplacement) {free((char*)m_sReplacement); m_sReplacement = 0;} if (m_sDescription) {free((char*)m_sDescription); m_sDescription = 0;} } const char *Filter::details() const { //caller frees result char *details=(char*)mallocCheck(1024); _SNPRINTF(details, 1023, "Filter:(%s)=[%s],[%s] -> [%s] (%s %s %s %s)", (m_sDescription ? m_sDescription:"!desc"), (m_sFilterRegEx ? m_sFilterRegEx:"!filter"), (m_sFilterRegExExceptions ? m_sFilterRegExExceptions:"!except"), (m_sReplacement ? m_sReplacement:"!replace"), (m_iFlags & NOCASE ? "CI" : "-"), //CI - case insensitive (m_iFlags & MULTILINE ? "ML" : "-"), //ML - multiline (m_iFlags & SINGLELINE ? "DA" : "-"), //DA - dot matches all (m_bMultipass ? "MP" : "-") //MP - Multipass ); return details; } const bool Filter::compile(const StringMap *parameters) { m_iReferences = 0; addRef(); //parameter substitution (note that the input and output can be equal (not malloc'd) if there are no parameters) const char *sFilterRegEx, *sFilterRegExExceptions; parameterise(m_sFilterRegEx, parameters, &sFilterRegEx); parameterise(m_sFilterRegExExceptions, parameters, &sFilterRegExExceptions); //compilation // NOFLAGS = 0x0000, // NOCASE = 0x0001, // ignore case // GLOBAL = 0x0002, // match everywhere in the string // MULTILINE = 0x0004, // ^ and $ can match internal line breaks // SINGLELINE = 0x0008, // . can match newline character // RIGHTMOST = 0x0010, // start matching at the right of the string // NOBACKREFS = 0x0020, // only meaningful when used with GLOBAL and substitute // FIRSTBACKREFS = 0x0040, // only meaningful when used with GLOBAL // ALLBACKREFS = 0x0080, // only meaningful when used with GLOBAL // NORMALIZE = 0x0100, // Preprocess patterns: "\\n" => "\n", etc. // EXTENDED = 0x0200, // ignore whitespace in pattern REGEX_FLAGS standard = NORMALIZE; if (sFilterRegEx && *sFilterRegEx) m_rgx = new rpattern_c(sFilterRegEx, "", m_iFlags|standard, GRETA_MATCHMODE); else m_rgx = 0; //to indicate that the regex i not to be used if (sFilterRegExExceptions && *sFilterRegExExceptions) m_rgxExceptions = new rpattern_c(sFilterRegExExceptions, "", m_iFlags|standard, MODE_DEFAULT); else m_rgxExceptions = 0; //to indicate that the regex i not to be used //free parameter subs if (sFilterRegEx && sFilterRegEx != m_sFilterRegEx) {free((void*)sFilterRegEx); sFilterRegEx = 0;} if (sFilterRegExExceptions && sFilterRegExExceptions != m_sFilterRegExExceptions) {free((void*)sFilterRegExExceptions); sFilterRegExExceptions = 0;} return (m_rgx != 0); } const bool Filter::parameterise(const char *input, const StringMap *parameters, const char **finaloutput) const { *finaloutput = input; //in case there are no parameters if (input && parameters) { //calculate new length size_t replacelen, newoutlen = strlen(input); const char *backref = input, *replacement = 0; char *backrefend = (char*)backref; char finish; StringMap::const_iterator i; vector ps; vector::const_iterator j; while (backref = strchr(backrefend, '$')) { //find the next parameter (could be a line end symbol ($) also) backrefend = (char*)backref; //backrefend will point to one *past* the end of the PARA_METER after this loop //it will always point to backref+1 initially because of the immediate increment while ((finish = *++backrefend) && ((finish >= 'A' && finish <= 'Z') || finish == '_')) 000; //$ must be followed by at least 1 A-Z to be a parameter //usually $, the end-of-line will be *near* the end of the expression anyway or (...$) if (backrefend >= backref + 2) { //we have a parameter $[A-Z]+ *backrefend = 0; //temporary store, it is returned afterwards using finish if (parameters) i = parameters->find(backref + 1); if (!parameters || i == parameters->end()) replacement = ""; else replacement = i->second; //look for the parameter replacelen = (replacement ? strlen(replacement) : 0); newoutlen += replacelen - (backrefend-backref); *backrefend = finish; parameterLoc p = {backref, backrefend, replacement, replacelen}; ps.push_back(p); } } //create and substitute char *output = (char*)mallocCheck(newoutlen + 1); char *outputTopos = output; const char *outputFrompos = input; for (j = ps.begin(); j != ps.end(); j++) { parameterLoc p = *j; memcpy(outputTopos, outputFrompos, p.rstart - outputFrompos); //pre from input into output outputTopos += p.rstart - outputFrompos; outputFrompos = p.rend; memcpy(outputTopos, p.replacement, p.replacelen); //replacement from parameters into output outputTopos += p.replacelen; } strcpy(outputTopos, outputFrompos); *finaloutput = output; } return (*finaloutput == input); } const size_t Filter::count(const char *input) { return m_rgx->count(input); } bool Filter::submatches(const char *input, vector *vSubMatchesOut) const { //caller must free the contents of vSubMatchesOut, but the caller must create vSubMatchesOut match_results_c results; //backref replacement rpattern_c::backref_type br; //match and position char *submatch = 0; bool matched; size_t len; br = m_rgx->match(input, results); //passed by address: first match? if (matched = br.matched) { //there is always 1 backref available at least (\0) for (size_t i = 0; i < results.cbackrefs(); i++) { len = results.rlength(i); submatch = (char*) mallocCheck(len+1); //sometimes backrefs are not matched because they are in optional areas (()|()) and are null pointers, 0 length //in this case a valid 1 length malloc empty string will be created if (len) strncpy(submatch, input + results.rstart(i), len); submatch[len] = 0; vSubMatchesOut->push_back(submatch); } } return matched; } const size_t Filter::replace(const char *input, const StringMap *parameters, const char **finaloutput, const size_t refReserve) { //caller needs to free output if not equal to input //note that no replacement string in findAll() means to include the entire match in the result vector like "\0" //in replace(), no replacement will cause the match to be removed from the the output like "" //needs to be re-entrant: multiple threads will be using the Filters concurrently //thus: all local non-const variables on the thread stack const char *output = input; //if no replacements are made then output=input size_t numReplacements = 0; if (m_rgx && (!m_rgxExceptions || !m_rgxExceptions->count(input))) { //count the exceptions (input passed by address) //parameter substitution into the replacement string (done every time the function is called: reentrant) const char *sReplacement = 0; if (m_sReplacement) parameterise(m_sReplacement, parameters, &sReplacement); //output string const char *copyposfrom = 0; char *newoutput = 0, *copyposto = 0; size_t newoutlen = 0, outlen = 0, prelen = 0; //replacement string const char *backrefStart = 0, *searchstart = 0; char *replaceOut = 0, *replaceOutposTo = 0, *replaceOutposFrom = 0; size_t numBackrefs = 0, i = 0, newReplacelen = 0, replacelen = 0; match_results_c results; rpattern_c::backref_type br; vector > brs; //matches found replacelen = (sReplacement ? strlen(sReplacement) : 0); unsigned int iPasses=_MULTIPASSMAX; do { //multipass brs.clear(); //new matches vector searchstart = output; br = m_rgx->match(searchstart, results); //passed by address: first match? outlen = strlen(output); newoutlen = outlen; //equal if no matches if (br.matched) { //calculate the total output length: iterate through all the occurrences of the pattern in the input string while (br.matched && br.second > br.first) { //we have the first match //replace the back references in the replacement string (if any) for each full match if (!sReplacement) { //no replacement: include nothing newReplacelen = 0; replaceOutposFrom = 0; } else { //we have a (potential backrefs) replacement string //should always have backrefs otherwise there will be a vector of equal strings with no content from input! numBackrefs = results.cbackrefs(); //there is always 1 backref available at least (\0) newReplacelen = replacelen; backrefStart = sReplacement; while (backrefStart = strchr(backrefStart, '\\')) { //iterate through the backrefs found in the replacement string i = (size_t)*++backrefStart - 48; //backref number (0-9) newReplacelen += ((numBackrefs > i ? results.rlength(i) : 0) - 2); } replaceOutposFrom = (char*)sReplacement; } replaceOut = (char*)mallocCheck(newReplacelen + 1); replaceOutposTo = (char*)replaceOut; backrefStart = sReplacement; while (backrefStart && (backrefStart = strchr(backrefStart, '\\'))) { //iterate through the backrefs found in the replacement string i = (size_t)*++backrefStart - 48; //backref number (0-9) memcpy(replaceOutposTo, replaceOutposFrom, backrefStart-replaceOutposFrom-1); replaceOutposTo += backrefStart - replaceOutposFrom - 1; replaceOutposFrom += backrefStart - replaceOutposFrom + 1; if (i < numBackrefs) { //copy the value for the backref (if exists) memcpy(replaceOutposTo, searchstart + results.rstart(i), results.rlength(i)); replaceOutposTo += results.rlength(i); } } memcpy(replaceOutposTo, replaceOutposFrom, newReplacelen-(replaceOutposTo-replaceOut)); //end bit (or whole string if there are no backrefs) replaceOut[newReplacelen] = 0; newoutlen += (newReplacelen - (br.second - br.first)); //add replacement length, subtract backref 0 length brs.push_back(make_pair(br, replaceOut)); //save to iterate through again after searchstart = br.second; br = m_rgx->match(searchstart, results); //next match? } //copy substitute into new string newoutput = (char*)mallocCheck(newoutlen+1); //plus trailing zero copyposfrom = output; copyposto = newoutput; for (vector >::iterator i = brs.begin(); i != brs.end(); i++) { br = i->first; replaceOut = i->second; prelen = br.first-copyposfrom; memcpy(copyposto, copyposfrom, prelen); //pre copyposto += prelen; copyposfrom = br.second; if (replaceOut) { //replacement can be a 0 pointer newReplacelen = strlen(replaceOut); memcpy(copyposto, replaceOut, newReplacelen); //replacement if (replaceOut != sReplacement) free((void*)replaceOut); copyposto += newReplacelen; } } memcpy(copyposto, copyposfrom, output + outlen - copyposfrom); //copy end newoutput[newoutlen] = 0; //zero complete string //prepare for the next iteration if (output != input) free((void*)output); //swap to new output output = newoutput; numReplacements += brs.size(); } } while (m_bMultipass && brs.size() && iPasses--); //parameterise will not malloc a new string if there are no parameters if (sReplacement && m_sReplacement && sReplacement != m_sReplacement) {free((char*)sReplacement);sReplacement = 0;} } *finaloutput = output; return numReplacements; } const bool Filter::match(const char *input) const { /* #ifdef _DEBUG if (!m_rgx) DEBUGPRINT("Filter:match [%s]", DEBUG_LINE, "no main pattern"); else if (m_rgx->count(input)) DEBUGPRINT("Filter:match [%s]", DEBUG_LINE, "main pattern matched"); if (!m_rgxExceptions) DEBUGPRINT("Filter:match [%s]", DEBUG_LINE, "no exception pattern"); else if (m_rgxExceptions->count(input)) DEBUGPRINT("Filter:match [%s]", DEBUG_LINE, "exception pattern matched"); #endif */ return (const bool)((m_rgx && m_rgx->count(input)) && (!m_rgxExceptions || !m_rgxExceptions->count(input))); } const size_t Filter::findAll(const char *input, const StringMap *parameters, vector *vMatchesOut, const bool unique) const { //caller must free the contents of vMatchesOut, however vMatchesOut is creataed by the caller //note that no replacement string in findAll() means to include the entire match in the result vector like "\0" //in replace(), no replacement will cause the match to be removed from the the output like "" match_results_c results; //backref replacement rpattern_c::backref_type br; //match and position const char *backrefStart = 0, *searchstart = 0; char *replaceOut = 0, *replaceOutposTo = 0, *replaceOutposFrom = 0; size_t numBackrefs, i, newReplacelen, replacelen; vector::const_iterator iFound; //parameter substitution into the replacement string (done every time the function is called: reentrant) const char *sReplacement; parameterise(m_sReplacement, parameters, &sReplacement); searchstart = input; br = m_rgx->match(searchstart, results); //passed by address: first match? replacelen = (sReplacement ? strlen(sReplacement) : 0); //iterate through all the occurrences of the pattern in the input string while (br.matched && br.second > br.first) { //we have the first match //replace the back references in the replacement string (if any) for each full match if (!sReplacement) { //no replacement: simply include the input newReplacelen = br.second - br.first; replaceOutposFrom = (char*)br.first; } else { //we have a (potential backrefs) replacement string //should always have backrefs otherwise there will be a vector of equal strings with no content from input! numBackrefs = results.cbackrefs(); //there is always 1 backref available at least (\0) newReplacelen = replacelen; backrefStart = sReplacement; while (backrefStart = strchr(backrefStart, '\\')) { //iterate through the backrefs found in the replacement string i = (size_t)*++backrefStart - 48; //backref number (0-9) newReplacelen += ((numBackrefs > i ? results.rlength(i) : 0) - 2); } replaceOutposFrom = (char*)sReplacement; } replaceOut = (char*)mallocCheck(newReplacelen+1); replaceOutposTo = (char*)replaceOut; backrefStart = sReplacement; while (backrefStart && (backrefStart = strchr(backrefStart, '\\'))) { //iterate through the backrefs found in the replacement string i = (size_t)*++backrefStart - 48; //backref number (0-9) memcpy(replaceOutposTo, replaceOutposFrom, backrefStart-replaceOutposFrom-1); replaceOutposTo+=backrefStart-replaceOutposFrom-1; replaceOutposFrom+=backrefStart-replaceOutposFrom+1; if (ibegin(); while (iFound != vMatchesOut->end() && _STRCMP(replaceOut, *iFound)) iFound++; //while not at end and not = search if (iFound == vMatchesOut->end()) vMatchesOut->push_back(replaceOut); } else vMatchesOut->push_back(replaceOut); searchstart = br.second; br = m_rgx->match(searchstart, results); //next match? } return vMatchesOut->size(); } //---------------------------------------------------- FilterGroup -------------------------------------- FilterGroup::~FilterGroup() { clearFilters(); } const int FilterGroup::clearFilters() { for (iterator iM=begin();iM!=end();iM++) iM->second->release(); clear(); return 0; } Filter *FilterGroup::filter(const int iRunOrder) const { Filter *pFilter = 0; const_iterator iFilter = find(iRunOrder); if (iFilter != end()) pFilter = iFilter->second; return pFilter; } const int FilterGroup::addFilter(Filter *pFilter, const enmConflictMode iConflictMode) { //for adding a single filter, FilterGroup will addRef and release the Filters where necessary switch (iConflictMode) { case conflict_includeAll: { insert(pair (pFilter->runOrder(), pFilter)); break; } case conflict_overwriteOld: { //overwrite any keys that already exist with this runOrder erase(pFilter->runOrder()); insert(pair (pFilter->runOrder(), pFilter)); break; } case conflict_ignoreNew: { //insert ignored if the key already exists if (find(pFilter->runOrder())!=end()) insert(pair (pFilter->runOrder(), pFilter)); break; } case conflict_promoteNew: { int iMaxKey=last()->first; int iDiff=iMaxKey+1; insert(pair (pFilter->runOrder()+iDiff, pFilter)); break; } case conflict_demoteNew: { int iMinExistKey=begin()->first; int iDiff=-(iMinExistKey-10); insert(pair (pFilter->runOrder()+iDiff, pFilter)); break; } } return 0; } const size_t FilterGroup::replace(const char *input, const StringMap *parameters, const char **output) { //caller frees output unless (null or = input) size_t numReplacements = 0; const char *originalinput = input; *output = input; //don't free output and input if they are equal on exit for (const_iterator iM = begin(); iM != end(); iM++) { numReplacements += iM->second->replace(input, parameters, output); //caller frees input (and output unless they are equal) if (input != originalinput && *output != input) free((void*)input); //don't free the original input or input=output input = *output; } return numReplacements; } const size_t FilterGroup::findAll(const char *input, const StringMap *parameters, vector *vAllMatches, const bool unique) const { //caller frees input, and all the char*s in the vAllMatches vector for (const_iterator iM=begin();iM!=end();iM++) iM->second->findAll(input, parameters, vAllMatches, unique); return vAllMatches->size(); } const bool FilterGroup::matchAll(const char *input) const { //caller manages input for (const_iterator iM = begin(); iM != end(); iM++) if (!iM->second->match(input)) return false; return true; } const bool FilterGroup::matchAny(const char *input) const { //caller manages input for (const_iterator iM = begin(); iM != end(); iM++) if (iM->second->match(input)) return true; return false; } const size_t FilterGroup::merge(FilterGroup *fgFilterGroup, const enmConflictMode iConflictMode) { iterator iMIncoming; switch (iConflictMode) { case conflict_includeAll: { //multimap accepts multiple values for a given key DEBUGPRINT("filter merge (%s)", DEBUG_CHECK, "includeAll"); for (iMIncoming=fgFilterGroup->begin();iMIncoming!=fgFilterGroup->end();iMIncoming++) { insert(*iMIncoming); iMIncoming->second->addRef(); } break; } case conflict_overwriteOld: { //overwrite any keys that already exist with this runOrder DEBUGPRINT("filter merge (%s)", DEBUG_CHECK, "overwriteOld"); iterator iMLocal; for (iMIncoming=fgFilterGroup->begin();iMIncoming!=fgFilterGroup->end();iMIncoming++) { //delete Filter if it exists iMLocal=find(iMIncoming->first); if (iMLocal!=end()) { iMLocal->second->release(); erase(iMLocal); } insert(*iMIncoming); iMIncoming->second->addRef(); } break; } case conflict_ignoreNew: { //insert ignored if the key already exists //it will be dereferenced and deleted in the destructor of the incoming FilterGroup DEBUGPRINT("filter merge (%s)", DEBUG_CHECK, "ignoreNew"); for (iMIncoming=fgFilterGroup->begin();iMIncoming!=fgFilterGroup->end();iMIncoming++) { if (find(iMIncoming->first)==end()) { insert(*iMIncoming); iMIncoming->second->addRef(); } } break; } case conflict_promoteNew: { //promotes the keys, not the runOrders DEBUGPRINT("filter merge (%s)", DEBUG_CHECK, "promoteNew"); int iMaxKey=(size()?last()->first:0); int iDiff=iMaxKey+1; for (iMIncoming=fgFilterGroup->begin();iMIncoming!=fgFilterGroup->end();iMIncoming++) { insert(pair (iMIncoming->first + iDiff, iMIncoming->second)); iMIncoming->second->addRef(); } break; } case conflict_demoteNew: { //demotes the keys, not the filter runOrders DEBUGPRINT("filter merge (%s)", DEBUG_CHECK, "demoteNew"); int iMinExistKey=(size()?begin()->first:0); int iMaxInKey=(fgFilterGroup->size()?fgFilterGroup->last()->first:0); int iDiff=-(iMaxInKey-iMinExistKey+1); for (iMIncoming=fgFilterGroup->begin();iMIncoming!=fgFilterGroup->end();iMIncoming++) { insert(pair (iMIncoming->first + iDiff, iMIncoming->second)); iMIncoming->second->addRef(); } break; } } //dereference the Filters in the incoming group so that the destructor does not delete the Filter DEBUG_RESULT_OK; return size(); }