platform/include/pion/platform/Transform.hpp

00001 // ------------------------------------------------------------------------
00002 // Pion is a development platform for building Reactors that process Events
00003 // ------------------------------------------------------------------------
00004 // Copyright (C) 2007-2008 Atomic Labs, Inc.  (http://www.atomiclabs.com)
00005 //
00006 // Pion is free software: you can redistribute it and/or modify it under the
00007 // terms of the GNU Affero General Public License as published by the Free
00008 // Software Foundation, either version 3 of the License, or (at your option)
00009 // any later version.
00010 //
00011 // Pion is distributed in the hope that it will be useful, but WITHOUT ANY
00012 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00013 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for
00014 // more details.
00015 //
00016 // You should have received a copy of the GNU Affero General Public License
00017 // along with Pion.  If not, see <http://www.gnu.org/licenses/>.
00018 //
00019 
00020 #ifndef __PION_TRANSFORM_HEADER__
00021 #define __PION_TRANSFORM_HEADER__
00022 
00023 #include <cctype>
00024 #include <cstring>
00025 #include <set>
00026 #include <boost/regex.hpp>
00027 #include <boost/regex/icu.hpp>
00028 #include <boost/algorithm/string/compare.hpp>
00029 #include <boost/algorithm/string/predicate.hpp>
00030 #include <boost/tokenizer.hpp>
00031 #include <pion/PionAlgorithms.hpp>
00032 #include <pion/PionConfig.hpp>
00033 #include <pion/PionException.hpp>
00034 #include <pion/platform/Vocabulary.hpp>
00035 #include <pion/platform/Event.hpp>
00036 #include <pion/platform/Comparison.hpp>
00037 #include <pion/PionLogger.hpp>
00038 #include <pion/platform/ConfigManager.hpp>
00039 
00040 namespace pion {        // begin namespace pion
00041 namespace platform {    // begin namespace platform (Pion Platform Library)
00042 
00043 class PION_PLATFORM_API Transform
00044 {
00045 public:
00047     Vocabulary::Term            m_term;
00048 
00050     class MissingTransformField : public PionException {
00051     public:
00052         MissingTransformField(const std::string& str)
00053             : PionException("Invalid type of transformation: ", str) {}
00054     };
00055 
00057     class RegexFailure : public PionException {
00058     public:
00059         RegexFailure(const std::string& what)
00060             : PionException("Regex replace failed: ", what) {}
00061     };
00062 
00064     class ValueAssignmentException : public PionException {
00065     public:
00066         ValueAssignmentException(const std::string& value)
00067             : PionException("AssignValue failed for value: ", value) {}
00068     };
00069 
00070 //  mutable PionLogger          m_logger;
00071 
00073     virtual ~Transform() {}
00074 
00081     Transform(const Vocabulary& v, const Vocabulary::Term& term)
00082         :   m_term(term) //, m_logger(PION_GET_LOGGER("pion.Transform"))
00083     {
00084     }
00085 
00091     void updateVocabulary(const Vocabulary& v)
00092     {
00093         // assume that Term references never change
00094         m_term = v[m_term.term_ref];
00095     }
00096 
00102     inline void removeTerm(EventPtr& e)
00103     {
00104         e->clear(m_term.term_ref);
00105     }
00106 
00114     virtual bool transform(EventPtr& d, const EventPtr& s) = 0;
00115 
00117     static const std::string            LOOKUP_TERM_NAME;
00118     static const std::string            TERM_ELEMENT_NAME;
00119     static const std::string            LOOKUP_MATCH_ELEMENT_NAME;
00120     static const std::string            LOOKUP_FORMAT_ELEMENT_NAME;
00121     static const std::string            LOOKUP_DEFAULT_ELEMENT_NAME;
00122     static const std::string            VALUE_ELEMENT_NAME;
00123     static const std::string            RULES_STOP_ON_FIRST_ELEMENT_NAME;
00124     static const std::string            RULE_ELEMENT_NAME;
00125     static const std::string            TYPE_ELEMENT_NAME;
00126     static const std::string            TRANSFORMATION_SET_VALUE_NAME;
00127     static const std::string            LOOKUP_DEFAULTACTION_ELEMENT_NAME;
00128     static const std::string            LOOKUP_LOOKUP_ELEMENT_NAME;
00129     static const std::string            LOOKUP_KEY_ATTRIBUTE_NAME;
00130     static const std::string            SOURCE_TERM_ELEMENT_NAME;
00131     static const std::string            REGEXP_ELEMENT_NAME;
00132     static const std::string            REGEXP_ATTRIBUTE_NAME;
00133     static const std::string            SEP_ATTRIBUTE_NAME;
00134     static const std::string            UNIQ_ATTRIBUTE_NAME;
00135 };
00136 
00146 inline bool AssignValue(EventPtr& e, const Vocabulary::Term& term, const std::string& value)
00147 {
00148 // Seems that not assigning empty values breaks functionality & tests
00149 //  if (value.empty())      // New shortcut -- if empty value, don't assign
00150 //    return true;
00151 
00152     try {
00153         e->set(term, value);
00154     } catch (...) {
00155         throw Transform::ValueAssignmentException(value);
00156     }
00157     return true;
00158 }
00159 
00166 class PION_PLATFORM_API TransformAssignValue
00167     : public Transform
00168 {
00170     std::string                 m_tr_set_value;
00171 
00172 public:
00173 
00181     TransformAssignValue(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00182         : Transform(v, term)
00183     {
00184         // <Value>escape(value)</Value>
00185         std::string val;
00186         if (! ConfigManager::getConfigOptionEmptyOk(VALUE_ELEMENT_NAME, val, config_ptr))
00187             throw MissingTransformField("Missing Value in TransformationAssignValue");
00188         m_tr_set_value = val;
00189     }
00190 
00199     virtual bool transform(EventPtr& d, const EventPtr& s)
00200     {
00201         return AssignValue(d, m_term, m_tr_set_value);
00202     }
00203 };
00204 
00205 
00208 class PION_PLATFORM_API TransformAssignTerm
00209     : public Transform
00210 {
00212     Vocabulary::Term            m_src_term;
00213 
00214 public:
00215 
00223     TransformAssignTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00224         : Transform(v, term)
00225     {
00226         // <Term>src-term</Term>
00227         std::string term_id;
00228         if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00229             throw MissingTransformField("Missing Source-Term in TransformationAssignTerm");
00230         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00231         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00232             throw MissingTransformField("Invalid Source-Term in TransformationAssignTerm");
00233         m_src_term = v[term_ref];
00234     }
00235 
00244     virtual bool transform(EventPtr& d, const EventPtr& s)
00245     {
00246         bool AnyCopied = false;
00247         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00248         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00249             std::string str;
00250             AnyCopied |= AssignValue(d, m_term, s->write(str, ec->value, m_src_term));
00251         }
00252         return AnyCopied;   // true, if any were copied...
00253     }
00254 };
00255 
00257 class PION_PLATFORM_API TransformURLEncode
00258     : public Transform
00259 {
00261     Vocabulary::Term            m_src_term;
00262 
00263 public:
00264 
00272     TransformURLEncode(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00273         : Transform(v, term)
00274     {
00275         // <Term>src-term</Term>
00276         std::string term_id;
00277         if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00278             throw MissingTransformField("Missing Source-Term in TransformationURLEncode");
00279         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00280         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00281             throw MissingTransformField("Invalid Source-Term in TransformationURLEncode");
00282         m_src_term = v[term_ref];
00283     }
00284 
00293     virtual bool transform(EventPtr& d, const EventPtr& s)
00294     {
00295         bool AnyCopied = false;
00296         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00297         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00298             std::string str;
00299             AnyCopied |= AssignValue(d, m_term, algo::url_encode(s->write(str, ec->value, m_src_term)));
00300         }
00301         return AnyCopied;   // true, if any were copied...
00302     }
00303 };
00304 
00306 class PION_PLATFORM_API TransformURLDecode
00307     : public Transform
00308 {
00310     Vocabulary::Term            m_src_term;
00311 
00312 public:
00313 
00321     TransformURLDecode(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00322         : Transform(v, term)
00323     {
00324         // <Term>src-term</Term>
00325         std::string term_id;
00326         if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00327             throw MissingTransformField("Missing Source-Term in TransformationURLDecode");
00328         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00329         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00330             throw MissingTransformField("Invalid Source-Term in TransformationURLDecode");
00331         m_src_term = v[term_ref];
00332     }
00333 
00342     virtual bool transform(EventPtr& d, const EventPtr& s)
00343     {
00344         bool AnyCopied = false;
00345         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00346         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00347             std::string str;
00348             AnyCopied |= AssignValue(d, m_term, algo::url_decode(s->write(str, ec->value, m_src_term)));
00349         }
00350         return AnyCopied;   // true, if any were copied...
00351     }
00352 };
00353 
00354 
00356 class PION_PLATFORM_API TransformSplitTerm
00357     : public Transform
00358 {
00360     Vocabulary::Term            m_src_term;
00361 
00363     boost::char_separator<char> m_sep;
00364 
00365 public:
00366 
00374     TransformSplitTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00375         : Transform(v, term)
00376     {
00377         // <Term>src-term</Term>
00378         std::string term_id;
00379         if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00380             throw MissingTransformField("Missing Source-Term in TransformationSplitTerm");
00381         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00382         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00383             throw MissingTransformField("Invalid Source-Term in TransformationSplitTerm");
00384         m_src_term = v[term_ref];
00385         std::string separator = ConfigManager::getAttribute(SEP_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00386         if (separator.empty())
00387             throw MissingTransformField("Missing separator value in TransformationSplitTerm");
00388         m_sep = boost::char_separator<char>(separator.c_str());
00389     }
00390 
00399     virtual bool transform(EventPtr& d, const EventPtr& s)
00400     {
00401         bool AnyCopied = false;
00402         typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
00403         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00404         std::string str;
00405         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00406             tokenizer str_tok(s->write(str, ec->value, m_src_term), m_sep);
00407             for (tokenizer::iterator tok_iter = str_tok.begin(); tok_iter != str_tok.end(); ++tok_iter)
00408                 AnyCopied |= AssignValue(d, m_term, *tok_iter);
00409         }
00410         return AnyCopied;   // true, if any were copied...
00411     }
00412 };
00413 
00415 class PION_PLATFORM_API TransformJoinTerm
00416     : public Transform
00417 {
00419     Vocabulary::Term            m_src_term;
00420 
00422     std::string                 m_separator;
00423 
00425     bool                        m_unique;
00426 
00427 public:
00428 
00436     TransformJoinTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00437         : Transform(v, term), m_unique(false)
00438     {
00439         // <Term>src-term</Term>
00440         std::string term_id;
00441         if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00442             throw MissingTransformField("Missing Source-Term in TransformationJoinTerm");
00443         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00444         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00445             throw MissingTransformField("Invalid Source-Term in TransformationJoinTerm");
00446         m_src_term = v[term_ref];
00447         m_separator = ConfigManager::getAttribute(SEP_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00448         if (m_separator.empty())
00449             throw MissingTransformField("Missing separator value in TransformationSplitTerm");
00450         std::string uniq = ConfigManager::getAttribute(UNIQ_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00451         if (uniq == "true")
00452             m_unique = true;
00453     }
00454 
00463     virtual bool transform(EventPtr& d, const EventPtr& s)
00464     {
00465         bool AnyCopied = false;
00466         std::set<std::string> seen;
00467         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00468         std::string result, str;
00469         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00470             s->write(str, ec->value, m_src_term);
00471             if (!str.empty() && seen.find(str) == seen.end()) {
00472                 if (!result.empty())
00473                     result += m_separator;
00474                 result += str;
00475                 if (m_unique)
00476                     seen.insert(str);
00477             }
00478         }
00479         if (!result.empty())
00480             AnyCopied |= AssignValue(d, m_term, result);
00481         return AnyCopied;   // true, if any were copied...
00482     }
00483 };
00484 
00485 
00486 
00488 class PION_PLATFORM_API TransformLookup
00489     : public Transform
00490 {
00492     typedef PION_HASH_MAP<std::string, std::string, PION_HASH_STRING>   KVP;
00493 
00495     Vocabulary::Term            m_lookup_term;
00496 
00498     boost::regex                m_match;
00499 
00501     std::string                 m_format;
00502 
00504     enum { DEF_UNDEF, DEF_SRCTERM, DEF_OUTPUT, DEF_FIXED }
00505                                 m_default;
00506 
00508     std::string                 m_fixed;
00509 
00511     KVP                         m_lookup;
00512 
00514     bool                        m_running;
00515 
00516 public:
00517 
00525     TransformLookup(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00526         : Transform(v, term)
00527     {
00528         //  <LookupTerm>src-term</LookupTerm>
00529         std::string term_id;
00530         if (! ConfigManager::getConfigOption(LOOKUP_TERM_NAME, term_id, config_ptr))
00531             throw MissingTransformField("Missing LookupTerm in TransformationAssignLookup");
00532         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00533         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00534             throw MissingTransformField("Invalid LookupTerm in TransformationAssignLookup");
00535         m_lookup_term = v[term_ref];
00536         //[opt]     <Match>escape(regexp)</Match>
00537         std::string val;
00538         if (ConfigManager::getConfigOptionEmptyOk(LOOKUP_MATCH_ELEMENT_NAME, val, config_ptr)) {
00539             try {
00540                 m_match = val;
00541             } catch (...) {
00542                 throw MissingTransformField("Invalid regular expression in TransformationLookup: " + val);
00543             }
00544         }
00545         //  [opt]       <Format>escape(format)</Format>
00546         m_format.clear();
00547         if (ConfigManager::getConfigOptionEmptyOk(LOOKUP_FORMAT_ELEMENT_NAME, val, config_ptr))
00548             m_format = val;
00549 
00550         //  [opt]       <DefaultAction>leave-undefined|src-term|output|fixedvalue</DefaultAction>
00551         m_default = DEF_UNDEF;
00552         if (ConfigManager::getConfigOption(LOOKUP_DEFAULTACTION_ELEMENT_NAME, val, config_ptr)) {
00553             if (val == "src-term")
00554                 m_default = DEF_SRCTERM;
00555             else if (val == "output")
00556                 m_default = DEF_OUTPUT;
00557             else if (val == "fixedvalue")
00558                 m_default = DEF_FIXED;
00559         }
00560         //  [opt]       <DefaultValue>escape(text)</DefaultValue>
00561         m_fixed.clear();
00562         if (m_default == DEF_FIXED && ConfigManager::getConfigOptionEmptyOk(LOOKUP_DEFAULT_ELEMENT_NAME, val, config_ptr))
00563             m_fixed = val;
00564         //  [rpt/]      <Lookup key="escape(key)">escape(value)</Lookup>
00565         xmlNodePtr LookupNode = config_ptr;
00566         while ( (LookupNode = ConfigManager::findConfigNodeByName(LOOKUP_LOOKUP_ELEMENT_NAME, LookupNode)) != NULL) {
00567             // get the value (element content)
00568             xmlChar *xml_char_ptr = xmlNodeGetContent(LookupNode);
00569             if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00570                 if (xml_char_ptr != NULL)
00571                     xmlFree(xml_char_ptr);
00572                 throw MissingTransformField("Missing Value in TransformationLookup");
00573             }
00574             const std::string val_str(reinterpret_cast<char*>(xml_char_ptr));
00575             xmlFree(xml_char_ptr);
00576             // next get the Term we want to map to
00577             xml_char_ptr = xmlGetProp(LookupNode, reinterpret_cast<const xmlChar*>(LOOKUP_KEY_ATTRIBUTE_NAME.c_str()));
00578             if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00579                 if (xml_char_ptr != NULL)
00580                     xmlFree(xml_char_ptr);
00581                 throw MissingTransformField("Missing Key in TransformationLookup");
00582             }
00583             const std::string key_str(reinterpret_cast<char*>(xml_char_ptr));
00584             xmlFree(xml_char_ptr);
00585             if (m_lookup.find(key_str) != m_lookup.end())
00586                 throw MissingTransformField("Duplicate Key in TransformationLookup");
00587             m_lookup[key_str] = val_str;
00588             LookupNode = LookupNode->next;
00589         }
00590         if (m_lookup.empty())
00591             throw MissingTransformField("No Key-Values in TransformationLookup");
00592 
00593         m_running = true;
00594     }
00595 
00597     virtual ~TransformLookup() {
00598         m_lookup.clear();
00599     }
00600 
00609     virtual bool transform(EventPtr& d, const EventPtr& s)
00610     {
00611         if (!m_running)
00612             return false;
00613         Event::ValuesRange values_range = s->equal_range(m_lookup_term.term_ref);
00614         Event::ConstIterator ec = values_range.first;
00615         // if ec == values_range.second ... source term was not found...
00616         bool AnyCopied = false;
00617         while (ec != values_range.second) {
00618             // Get the source term
00619             std::string str;
00620             s->write(str, ec->value, m_lookup_term);
00621             // If regex defined, do the regular expression, replacing the key value
00622             if (! m_match.empty()) {
00623                 try {
00624                     str = boost::regex_replace(str, m_match, m_format, boost::format_all | boost::format_no_copy);
00625                 } catch (...) {
00626                     // Get the source string again
00627                     str.clear();
00628                     s->write(str, ec->value, m_lookup_term);
00629                     // Not running anymore
00630                     m_running = false;
00631                     // Throw on this, to get an error message logged
00632                     throw RegexFailure("str=" + str + ", regex=" + m_match.str());
00633                 }
00634             }
00635             // Find the value, using the key
00636             KVP::const_iterator i = m_lookup.find(str);
00637             if (i != m_lookup.end())    // Found: assign the lookup value
00638                 AnyCopied |= AssignValue(d, m_term, i->second);
00639             else                        // Not found: perform default action
00640                 switch (m_default) {
00641                     case DEF_UNDEF:     // Leave undefined, i.e. do nothing
00642                         break;
00643                     case DEF_SRCTERM:   // Re-get the original value, assign it
00644                         {
00645                             std::string str;
00646                             AnyCopied |= AssignValue(d, m_term, s->write(str, ec->value, m_lookup_term));
00647                         }
00648                         break;
00649                     case DEF_OUTPUT:    // Assign the regex output value
00650                         AnyCopied |= AssignValue(d, m_term, str);
00651                         break;
00652                     case DEF_FIXED:     // Assign the fixed value
00653                         AnyCopied |= AssignValue(d, m_term, m_fixed);
00654                         break;
00655                 }
00656             ec++;           // repeat for all matching source terms
00657         }
00658         return AnyCopied;   // true, if any were copied...
00659     }
00660 };
00661 
00663 class PION_PLATFORM_API TransformRules
00664     : public Transform
00665 {
00667     bool                                        m_short_circuit;
00668 
00670     std::vector<std::string>                    m_set_value;
00671 
00673     std::vector<Comparison *>                   m_comparison;
00674 
00676     std::vector<bool>                           m_running;
00677 
00678 public:
00679 
00687     TransformRules(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00688         : Transform(v, term)
00689     {
00690         // <StopOnFirstMatch>true|false</StopOnFirstMatch>          -> DEFAULT: true
00691         m_short_circuit = false;
00692         std::string short_circuit_str;
00693         if (! ConfigManager::getConfigOption(RULES_STOP_ON_FIRST_ELEMENT_NAME, short_circuit_str, config_ptr))
00694             throw MissingTransformField("Missing StopOnFirstMatch in TransformationAssignRules");
00695         if (short_circuit_str == "true")
00696             m_short_circuit = true;
00697 
00698         //  [rpt]       <Rule>
00699         xmlNodePtr RuleNode = config_ptr;
00700         while ( (RuleNode = ConfigManager::findConfigNodeByName(RULE_ELEMENT_NAME, RuleNode)) != NULL)
00701         {
00702             //  <Term>src-term</Term>
00703             std::string term_id;
00704             if (! ConfigManager::getConfigOption(TERM_ELEMENT_NAME, term_id, RuleNode->children))
00705                 throw MissingTransformField("Missing Source-Term in TransformationAssignRules");
00706             Vocabulary::TermRef term_ref = v.findTerm(term_id);
00707             if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00708                 throw MissingTransformField("Invalid Term in TransformationAssignRules");
00709 
00710             //  <Type>test-type</Type>
00711             std::string val;
00712             if (! ConfigManager::getConfigOption(TYPE_ELEMENT_NAME, val, RuleNode->children))
00713                 throw MissingTransformField("Missing Value in TransformationAssignRules");
00714             Comparison::ComparisonType ctype = Comparison::parseComparisonType(val);
00715 
00716             //  <Value>escape(test-value)</Value>
00717             std::string value_str;
00718             if (Comparison::requiresValue(ctype))
00719                 if (! ConfigManager::getConfigOptionEmptyOk(VALUE_ELEMENT_NAME, value_str, RuleNode->children))
00720                     throw MissingTransformField("Missing Value in TransformationAssignRules");
00721 
00722             Comparison *comp = new Comparison(v[term_ref]);
00723             comp->configure(ctype, value_str);
00724             m_comparison.push_back(comp);
00725 
00726             //  <SetValue>escape(set-value)</SetValue>
00727             val.clear();
00728             if (! ConfigManager::getConfigOptionEmptyOk(TRANSFORMATION_SET_VALUE_NAME, val, RuleNode->children))
00729                 throw MissingTransformField("Missing SetValue in TransformationAssignRules");
00730             m_set_value.push_back(val);
00731 
00732             // Set running state
00733             m_running.push_back(true);
00734 
00735             RuleNode = RuleNode->next;
00736         }
00737     }
00738 
00740     virtual ~TransformRules() {
00741         for (unsigned int i = 0; i < m_comparison.size(); i++)
00742             delete m_comparison[i];
00743     }
00744 
00753     virtual bool transform(EventPtr& d, const EventPtr& s)
00754     {
00755         bool AnyAssigned = false;
00756         // Loop through all TESTs, break out if any term successful on any test and short_circuit
00757         for (unsigned int i = 0; i < m_comparison.size(); i++)
00758             if (m_running[i]) {
00759                 switch (m_comparison[i]->getType()) {
00760                     // We'll take out the cases where there might not be values to iterate through, and handle them individually.
00761                     case Comparison::TYPE_IS_DEFINED:
00762                         if (s->getType() == m_comparison[i]->getTerm().term_ref || s->isDefined(m_comparison[i]->getTerm().term_ref))
00763                             AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00764                         break;
00765                     case Comparison::TYPE_TRUE:
00766                         AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00767                         break;
00768                     case Comparison::TYPE_FALSE:
00769                         break;
00770                     case Comparison::TYPE_IS_NOT_DEFINED:
00771                         if (! (s->getType() == m_comparison[i]->getTerm().term_ref || s->isDefined(m_comparison[i]->getTerm().term_ref)))
00772                             AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00773                         break;
00774                     default:
00775                         {
00776                             Event::ValuesRange values_range = s->equal_range(m_comparison[i]->getTerm().term_ref);
00777                             for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++)
00778                                 try {
00779                                     Event::ConstIterator ec_past = ec;
00780                                     if (m_comparison[i]->evaluateRange(std::make_pair(ec, ++ec_past))) {
00781                                         if (m_comparison[i]->getType() == Comparison::TYPE_REGEX) {     // Only for POSITIVE regex...
00782                                             // Get the original value
00783                                             // For Regex... get the precompiled from Comparison
00784                                             // For Format... use the set_value
00785                                             std::string str;
00786                                             str = boost::u32regex_replace(s->write(str, ec->value, m_comparison[i]->getTerm()), m_comparison[i]->getRegex(),
00787                                                                         m_set_value[i], boost::format_all | boost::format_no_copy);
00788                                             // Assign the result
00789                                             AnyAssigned |= AssignValue(d, m_term, str);
00790                                         } else
00791                                             AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00792                                     }
00793                                 } catch (...) {
00794                                     // Get the original value again...
00795                                     std::string str;
00796                                     // This rule won't be running again...
00797                                     m_running[i] = false;
00798                                     // Throw on this, to get an error message logged
00799                                     throw RegexFailure("str=" + s->write(str, ec->value, m_comparison[i]->getTerm()) + ", regex=" + m_comparison[i]->getRegexStr());
00800                                 }
00801                         }
00802                         break;
00803                 }
00804                 // If short_circuit AND any values were assigned -> don't go further in the chain
00805                 if (m_short_circuit && AnyAssigned)
00806                     break;
00807             }
00808         return AnyAssigned;
00809     }
00810 };
00811 
00813 class PION_PLATFORM_API TransformRegex
00814     : public Transform
00815 {
00817     Vocabulary::Term                            m_src_term;
00818 
00820     std::vector<std::string>                    m_format;
00821 
00823     std::vector<boost::u32regex>                m_regex;
00824 
00826     std::vector<std::string>                    m_regex_str;
00827 
00829     std::vector<bool>                           m_running;
00830 
00831 public:
00832 
00840     TransformRegex(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00841         : Transform(v, term)
00842     {
00843         //  <SourceTerm>src-term</SourceTerm>
00844         std::string term_id;
00845         if (! ConfigManager::getConfigOption(SOURCE_TERM_ELEMENT_NAME, term_id, config_ptr))
00846             throw MissingTransformField("Missing SourceTerm in TransformationRegex");
00847         Vocabulary::TermRef term_ref = v.findTerm(term_id);
00848         if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00849             throw MissingTransformField("Invalid SourceTerm in TransformationRegex");
00850         m_src_term = v[term_ref];
00851         xmlNodePtr RegexNode = config_ptr;
00852         while ( (RegexNode = ConfigManager::findConfigNodeByName(REGEXP_ELEMENT_NAME, RegexNode)) != NULL) {
00853             // get the FORMAT (element content)
00854             xmlChar *xml_char_ptr = xmlNodeGetContent(RegexNode);
00855             std::string val;
00856             if (xml_char_ptr != NULL && xml_char_ptr[0] != '\0')
00857                 val = reinterpret_cast<char*>(xml_char_ptr);
00858             if (xml_char_ptr != NULL) xmlFree(xml_char_ptr);
00859             m_format.push_back(val);
00860             // next get the Term we want to map to
00861             xml_char_ptr = xmlGetProp(RegexNode, reinterpret_cast<const xmlChar*>(REGEXP_ATTRIBUTE_NAME.c_str()));
00862             if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00863                 if (xml_char_ptr != NULL)
00864                     xmlFree(xml_char_ptr);
00865                 throw MissingTransformField("Missing Regexp in TransformationRegex");
00866             }
00867             val = reinterpret_cast<char*>(xml_char_ptr);
00868             xmlFree(xml_char_ptr);
00869             boost::u32regex reg;
00870             try {
00871                 reg = boost::make_u32regex(val);
00872             } catch (...) {
00873                 throw MissingTransformField("Invalid regular expression in TransformationRegex");
00874             }
00875             m_regex.push_back(reg);
00876             m_regex_str.push_back(val);
00877             m_running.push_back(true);
00878             RegexNode = RegexNode->next;
00879         }
00880         if (m_regex.empty())
00881             throw MissingTransformField("No Regexp's in TransformationRegex");
00882     }
00883 
00885     virtual ~TransformRegex() { }
00886 
00895     virtual bool transform(EventPtr& d, const EventPtr& s)
00896     {
00897         bool AnyAssigned = false;
00898         // Iterate through all values from source term
00899         Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00900         for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00901             // Take the original value from source term set
00902             std::string str;
00903             s->write(str, ec->value, m_src_term);
00904             // Run through all regexp's
00905             for (unsigned int i = 0; i < m_regex.size(); i++)
00906                 if (m_running[i]) {
00907                     std::string res;
00908                     try {
00909                         res = boost::u32regex_replace(str, m_regex[i], m_format[i], boost::format_all | boost::format_no_copy);
00910                     } catch (...) {
00911                         // This rule won't be running again...
00912                         m_running[i] = false;
00913                         // Throw on this, to get an error message logged
00914                         throw RegexFailure("str=" + str + ", regex=" + m_regex_str[i]);
00915                     }
00916                     if (!res.empty())
00917                         str = res;
00918                 }
00919             AnyAssigned |= AssignValue(d, m_term, str);
00920         }
00921         return AnyAssigned;
00922     }
00923 };
00924 
00925 
00939 template <class IteratorType>
00940 inline bool HideCreditCardNumbers(IteratorType first, IteratorType last)
00941 {
00942     // static regular expressions used to find and verify credit card numbers
00943     //
00944     // Visa: starts with "4", has 16 digits in blocks of 4
00945     // 4(?:[\s+-]?\d){15}
00946     //
00947     // MasterCard: starts with 51-55, has 16 digits in blocks of 4
00948     // 5[1-5](?:[\s+-]?\d){14}
00949     //
00950     // Amex: starts with 34 or 37, has 15 digits as 4-6-5
00951     // (?:34|37)(?:[\s+-]?\d){13}
00952     //
00953     // Discover: starts with 6011 or 65, has 16 digits in blocks of 4
00954     // (?:6011|65\d\d)(?:[\s+-]?\d){12}
00955     //
00956     // Diners: starts with 36, 38 or 300-305, all have 14 digits (blocks?)
00957     // (?:30[0-5]|36\d|38\d)(?:[\s+-]?\d){11}
00958     //
00959     // JCB: starts with 35, has 16 digits (blocks?)
00960     // 35(?:[\s+-]?\d){14}
00961     //
00962     // combined:
00963     // 4(?:[\s+-]?\d){15}|5[1-5](?:[\s+-]?\d){14}|(?:34|37)(?:[\s+-]?\d){13}|(?:6011|65\d\d)(?:[\s+-]?\d){12}|(?:30[0-5]|36\d|38\d)(?:[\s+-]?\d){11}|35(?:[\s+-]?\d){14}
00964     //
00965     // resources:
00966     // http://en.wikipedia.org/wiki/Credit_card_number
00967     // http://en.wikipedia.org/wiki/List_of_Bank_Identification_Numbers
00968     // http://www.regular-expressions.info/creditcard.html
00969     // 
00970     static const boost::regex FIND_CC_NUMBER_RX("\\b(?:4(?:[\\s+-]?\\d){15}|5[1-5](?:[\\s+-]?\\d){14}|(?:34|37)(?:[\\s+-]?\\d){13}|(?:6011|65\\d\\d)(?:[\\s+-]?\\d){12}|(?:30[0-5]|36\\d|38\\d)(?:[\\s+-]?\\d){11}|35(?:[\\s+-]?\\d){14})\\b");
00971 
00972     // variable used to store regex match information
00973     boost::match_results<IteratorType> results;
00974     
00975     // keeps track of whether or not we found a match
00976     bool found_match = false;
00977 
00978     // loop through the string looking for each possible match
00979     while (boost::regex_search(first, last, results, FIND_CC_NUMBER_RX)) {
00980         // basic verification succeeded
00981         found_match = true;
00982         
00983         // replace match string with X's
00984         for (IteratorType tmp_it = results[0].first; tmp_it != results[0].second; ++tmp_it) {
00985             *tmp_it = 'X';
00986         }
00987 
00988         // start searching for more cc numbers after end of current match
00989         // (no need to re-check earlier bytes)
00990         first = results[0].second;
00991     }
00992     
00993     return found_match;
00994 }
00995 
00996 
01007 inline bool HideCreditCardNumbers(Event& e, const Vocabulary::TermRef& term_ref)
01008 {
01009     // keeps track of whether or not we found a match
01010     bool found_match = false;
01011 
01012     // get range of matching parameters within the event & iterate
01013     Event::ValuesRange values_range = e.equal_range(term_ref);
01014     for (Event::ConstIterator it = values_range.first; it != values_range.second; it++) {
01015         const Event::BlobType& b(boost::get<const Event::BlobType&>(it->value));
01016         char *first = const_cast<char*>(b.get());
01017         char *last = first + b.size();
01018         if (HideCreditCardNumbers(first, last)) {
01019             found_match = true;
01020         }
01021     }
01022 
01023     return found_match;
01024 }
01025 
01026 
01034 inline bool HideCreditCardNumbers(Event& e)
01035 {
01036     // keeps track of whether or not we found a match
01037     bool found_match = false;
01038 
01039     // get range of matching parameters within the event & iterate
01040     for (Event::ConstIterator it = e.begin(); it != e.end(); it++) {
01041         if (boost::get<const Event::BlobType&>(& it->value)) {      // make sure it's BlobType
01042             const Event::BlobType& b(boost::get<const Event::BlobType&>(it->value));
01043             char *first = const_cast<char*>(b.get());
01044             char *last = first + b.size();
01045             if (HideCreditCardNumbers(first, last)) {
01046                 found_match = true;
01047             }
01048         }
01049     }
01050 
01051     return found_match;
01052 }
01053 
01054 
01055 }   // end namespace platform
01056 }   // end namespace pion
01057 
01058 
01059 #endif

Generated on Wed Apr 13 16:38:34 2011 for pion-platform by  doxygen 1.4.7