00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef __PION_TRANSFORM_HEADER__
00021 #define __PION_TRANSFORM_HEADER__
00022
00023 #include <cctype>
00024 #include <cstring>
00025 #include <set>
00026 #include <boost/regex.hpp>
00027 #include <boost/regex/icu.hpp>
00028 #include <boost/algorithm/string/compare.hpp>
00029 #include <boost/algorithm/string/predicate.hpp>
00030 #include <boost/tokenizer.hpp>
00031 #include <pion/PionAlgorithms.hpp>
00032 #include <pion/PionConfig.hpp>
00033 #include <pion/PionException.hpp>
00034 #include <pion/platform/Vocabulary.hpp>
00035 #include <pion/platform/Event.hpp>
00036 #include <pion/platform/Comparison.hpp>
00037 #include <pion/PionLogger.hpp>
00038 #include <pion/platform/ConfigManager.hpp>
00039
00040 namespace pion {
00041 namespace platform {
00042
00043 class PION_PLATFORM_API Transform
00044 {
00045 public:
00047 Vocabulary::Term m_term;
00048
00050 class MissingTransformField : public PionException {
00051 public:
00052 MissingTransformField(const std::string& str)
00053 : PionException("Invalid type of transformation: ", str) {}
00054 };
00055
00057 class RegexFailure : public PionException {
00058 public:
00059 RegexFailure(const std::string& what)
00060 : PionException("Regex replace failed: ", what) {}
00061 };
00062
00064 class ValueAssignmentException : public PionException {
00065 public:
00066 ValueAssignmentException(const std::string& value)
00067 : PionException("AssignValue failed for value: ", value) {}
00068 };
00069
00070
00071
00073 virtual ~Transform() {}
00074
00081 Transform(const Vocabulary& v, const Vocabulary::Term& term)
00082 : m_term(term)
00083 {
00084 }
00085
00091 void updateVocabulary(const Vocabulary& v)
00092 {
00093
00094 m_term = v[m_term.term_ref];
00095 }
00096
00102 inline void removeTerm(EventPtr& e)
00103 {
00104 e->clear(m_term.term_ref);
00105 }
00106
00114 virtual bool transform(EventPtr& d, const EventPtr& s) = 0;
00115
00117 static const std::string LOOKUP_TERM_NAME;
00118 static const std::string TERM_ELEMENT_NAME;
00119 static const std::string LOOKUP_MATCH_ELEMENT_NAME;
00120 static const std::string LOOKUP_FORMAT_ELEMENT_NAME;
00121 static const std::string LOOKUP_DEFAULT_ELEMENT_NAME;
00122 static const std::string VALUE_ELEMENT_NAME;
00123 static const std::string RULES_STOP_ON_FIRST_ELEMENT_NAME;
00124 static const std::string RULE_ELEMENT_NAME;
00125 static const std::string TYPE_ELEMENT_NAME;
00126 static const std::string TRANSFORMATION_SET_VALUE_NAME;
00127 static const std::string LOOKUP_DEFAULTACTION_ELEMENT_NAME;
00128 static const std::string LOOKUP_LOOKUP_ELEMENT_NAME;
00129 static const std::string LOOKUP_KEY_ATTRIBUTE_NAME;
00130 static const std::string SOURCE_TERM_ELEMENT_NAME;
00131 static const std::string REGEXP_ELEMENT_NAME;
00132 static const std::string REGEXP_ATTRIBUTE_NAME;
00133 static const std::string SEP_ATTRIBUTE_NAME;
00134 static const std::string UNIQ_ATTRIBUTE_NAME;
00135 };
00136
00146 inline bool AssignValue(EventPtr& e, const Vocabulary::Term& term, const std::string& value)
00147 {
00148
00149
00150
00151
00152 try {
00153 e->set(term, value);
00154 } catch (...) {
00155 throw Transform::ValueAssignmentException(value);
00156 }
00157 return true;
00158 }
00159
00166 class PION_PLATFORM_API TransformAssignValue
00167 : public Transform
00168 {
00170 std::string m_tr_set_value;
00171
00172 public:
00173
00181 TransformAssignValue(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00182 : Transform(v, term)
00183 {
00184
00185 std::string val;
00186 if (! ConfigManager::getConfigOptionEmptyOk(VALUE_ELEMENT_NAME, val, config_ptr))
00187 throw MissingTransformField("Missing Value in TransformationAssignValue");
00188 m_tr_set_value = val;
00189 }
00190
00199 virtual bool transform(EventPtr& d, const EventPtr& s)
00200 {
00201 return AssignValue(d, m_term, m_tr_set_value);
00202 }
00203 };
00204
00205
00208 class PION_PLATFORM_API TransformAssignTerm
00209 : public Transform
00210 {
00212 Vocabulary::Term m_src_term;
00213
00214 public:
00215
00223 TransformAssignTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00224 : Transform(v, term)
00225 {
00226
00227 std::string term_id;
00228 if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00229 throw MissingTransformField("Missing Source-Term in TransformationAssignTerm");
00230 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00231 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00232 throw MissingTransformField("Invalid Source-Term in TransformationAssignTerm");
00233 m_src_term = v[term_ref];
00234 }
00235
00244 virtual bool transform(EventPtr& d, const EventPtr& s)
00245 {
00246 bool AnyCopied = false;
00247 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00248 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00249 std::string str;
00250 AnyCopied |= AssignValue(d, m_term, s->write(str, ec->value, m_src_term));
00251 }
00252 return AnyCopied;
00253 }
00254 };
00255
00257 class PION_PLATFORM_API TransformURLEncode
00258 : public Transform
00259 {
00261 Vocabulary::Term m_src_term;
00262
00263 public:
00264
00272 TransformURLEncode(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00273 : Transform(v, term)
00274 {
00275
00276 std::string term_id;
00277 if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00278 throw MissingTransformField("Missing Source-Term in TransformationURLEncode");
00279 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00280 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00281 throw MissingTransformField("Invalid Source-Term in TransformationURLEncode");
00282 m_src_term = v[term_ref];
00283 }
00284
00293 virtual bool transform(EventPtr& d, const EventPtr& s)
00294 {
00295 bool AnyCopied = false;
00296 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00297 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00298 std::string str;
00299 AnyCopied |= AssignValue(d, m_term, algo::url_encode(s->write(str, ec->value, m_src_term)));
00300 }
00301 return AnyCopied;
00302 }
00303 };
00304
00306 class PION_PLATFORM_API TransformURLDecode
00307 : public Transform
00308 {
00310 Vocabulary::Term m_src_term;
00311
00312 public:
00313
00321 TransformURLDecode(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00322 : Transform(v, term)
00323 {
00324
00325 std::string term_id;
00326 if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00327 throw MissingTransformField("Missing Source-Term in TransformationURLDecode");
00328 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00329 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00330 throw MissingTransformField("Invalid Source-Term in TransformationURLDecode");
00331 m_src_term = v[term_ref];
00332 }
00333
00342 virtual bool transform(EventPtr& d, const EventPtr& s)
00343 {
00344 bool AnyCopied = false;
00345 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00346 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00347 std::string str;
00348 AnyCopied |= AssignValue(d, m_term, algo::url_decode(s->write(str, ec->value, m_src_term)));
00349 }
00350 return AnyCopied;
00351 }
00352 };
00353
00354
00356 class PION_PLATFORM_API TransformSplitTerm
00357 : public Transform
00358 {
00360 Vocabulary::Term m_src_term;
00361
00363 boost::char_separator<char> m_sep;
00364
00365 public:
00366
00374 TransformSplitTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00375 : Transform(v, term)
00376 {
00377
00378 std::string term_id;
00379 if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00380 throw MissingTransformField("Missing Source-Term in TransformationSplitTerm");
00381 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00382 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00383 throw MissingTransformField("Invalid Source-Term in TransformationSplitTerm");
00384 m_src_term = v[term_ref];
00385 std::string separator = ConfigManager::getAttribute(SEP_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00386 if (separator.empty())
00387 throw MissingTransformField("Missing separator value in TransformationSplitTerm");
00388 m_sep = boost::char_separator<char>(separator.c_str());
00389 }
00390
00399 virtual bool transform(EventPtr& d, const EventPtr& s)
00400 {
00401 bool AnyCopied = false;
00402 typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
00403 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00404 std::string str;
00405 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00406 tokenizer str_tok(s->write(str, ec->value, m_src_term), m_sep);
00407 for (tokenizer::iterator tok_iter = str_tok.begin(); tok_iter != str_tok.end(); ++tok_iter)
00408 AnyCopied |= AssignValue(d, m_term, *tok_iter);
00409 }
00410 return AnyCopied;
00411 }
00412 };
00413
00415 class PION_PLATFORM_API TransformJoinTerm
00416 : public Transform
00417 {
00419 Vocabulary::Term m_src_term;
00420
00422 std::string m_separator;
00423
00425 bool m_unique;
00426
00427 public:
00428
00436 TransformJoinTerm(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00437 : Transform(v, term), m_unique(false)
00438 {
00439
00440 std::string term_id;
00441 if (! ConfigManager::getConfigOption(VALUE_ELEMENT_NAME, term_id, config_ptr))
00442 throw MissingTransformField("Missing Source-Term in TransformationJoinTerm");
00443 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00444 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00445 throw MissingTransformField("Invalid Source-Term in TransformationJoinTerm");
00446 m_src_term = v[term_ref];
00447 m_separator = ConfigManager::getAttribute(SEP_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00448 if (m_separator.empty())
00449 throw MissingTransformField("Missing separator value in TransformationSplitTerm");
00450 std::string uniq = ConfigManager::getAttribute(UNIQ_ATTRIBUTE_NAME, ConfigManager::findConfigNodeByName(VALUE_ELEMENT_NAME, config_ptr));
00451 if (uniq == "true")
00452 m_unique = true;
00453 }
00454
00463 virtual bool transform(EventPtr& d, const EventPtr& s)
00464 {
00465 bool AnyCopied = false;
00466 std::set<std::string> seen;
00467 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00468 std::string result, str;
00469 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00470 s->write(str, ec->value, m_src_term);
00471 if (!str.empty() && seen.find(str) == seen.end()) {
00472 if (!result.empty())
00473 result += m_separator;
00474 result += str;
00475 if (m_unique)
00476 seen.insert(str);
00477 }
00478 }
00479 if (!result.empty())
00480 AnyCopied |= AssignValue(d, m_term, result);
00481 return AnyCopied;
00482 }
00483 };
00484
00485
00486
00488 class PION_PLATFORM_API TransformLookup
00489 : public Transform
00490 {
00492 typedef PION_HASH_MAP<std::string, std::string, PION_HASH_STRING> KVP;
00493
00495 Vocabulary::Term m_lookup_term;
00496
00498 boost::regex m_match;
00499
00501 std::string m_format;
00502
00504 enum { DEF_UNDEF, DEF_SRCTERM, DEF_OUTPUT, DEF_FIXED }
00505 m_default;
00506
00508 std::string m_fixed;
00509
00511 KVP m_lookup;
00512
00514 bool m_running;
00515
00516 public:
00517
00525 TransformLookup(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00526 : Transform(v, term)
00527 {
00528
00529 std::string term_id;
00530 if (! ConfigManager::getConfigOption(LOOKUP_TERM_NAME, term_id, config_ptr))
00531 throw MissingTransformField("Missing LookupTerm in TransformationAssignLookup");
00532 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00533 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00534 throw MissingTransformField("Invalid LookupTerm in TransformationAssignLookup");
00535 m_lookup_term = v[term_ref];
00536
00537 std::string val;
00538 if (ConfigManager::getConfigOptionEmptyOk(LOOKUP_MATCH_ELEMENT_NAME, val, config_ptr)) {
00539 try {
00540 m_match = val;
00541 } catch (...) {
00542 throw MissingTransformField("Invalid regular expression in TransformationLookup: " + val);
00543 }
00544 }
00545
00546 m_format.clear();
00547 if (ConfigManager::getConfigOptionEmptyOk(LOOKUP_FORMAT_ELEMENT_NAME, val, config_ptr))
00548 m_format = val;
00549
00550
00551 m_default = DEF_UNDEF;
00552 if (ConfigManager::getConfigOption(LOOKUP_DEFAULTACTION_ELEMENT_NAME, val, config_ptr)) {
00553 if (val == "src-term")
00554 m_default = DEF_SRCTERM;
00555 else if (val == "output")
00556 m_default = DEF_OUTPUT;
00557 else if (val == "fixedvalue")
00558 m_default = DEF_FIXED;
00559 }
00560
00561 m_fixed.clear();
00562 if (m_default == DEF_FIXED && ConfigManager::getConfigOptionEmptyOk(LOOKUP_DEFAULT_ELEMENT_NAME, val, config_ptr))
00563 m_fixed = val;
00564
00565 xmlNodePtr LookupNode = config_ptr;
00566 while ( (LookupNode = ConfigManager::findConfigNodeByName(LOOKUP_LOOKUP_ELEMENT_NAME, LookupNode)) != NULL) {
00567
00568 xmlChar *xml_char_ptr = xmlNodeGetContent(LookupNode);
00569 if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00570 if (xml_char_ptr != NULL)
00571 xmlFree(xml_char_ptr);
00572 throw MissingTransformField("Missing Value in TransformationLookup");
00573 }
00574 const std::string val_str(reinterpret_cast<char*>(xml_char_ptr));
00575 xmlFree(xml_char_ptr);
00576
00577 xml_char_ptr = xmlGetProp(LookupNode, reinterpret_cast<const xmlChar*>(LOOKUP_KEY_ATTRIBUTE_NAME.c_str()));
00578 if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00579 if (xml_char_ptr != NULL)
00580 xmlFree(xml_char_ptr);
00581 throw MissingTransformField("Missing Key in TransformationLookup");
00582 }
00583 const std::string key_str(reinterpret_cast<char*>(xml_char_ptr));
00584 xmlFree(xml_char_ptr);
00585 if (m_lookup.find(key_str) != m_lookup.end())
00586 throw MissingTransformField("Duplicate Key in TransformationLookup");
00587 m_lookup[key_str] = val_str;
00588 LookupNode = LookupNode->next;
00589 }
00590 if (m_lookup.empty())
00591 throw MissingTransformField("No Key-Values in TransformationLookup");
00592
00593 m_running = true;
00594 }
00595
00597 virtual ~TransformLookup() {
00598 m_lookup.clear();
00599 }
00600
00609 virtual bool transform(EventPtr& d, const EventPtr& s)
00610 {
00611 if (!m_running)
00612 return false;
00613 Event::ValuesRange values_range = s->equal_range(m_lookup_term.term_ref);
00614 Event::ConstIterator ec = values_range.first;
00615
00616 bool AnyCopied = false;
00617 while (ec != values_range.second) {
00618
00619 std::string str;
00620 s->write(str, ec->value, m_lookup_term);
00621
00622 if (! m_match.empty()) {
00623 try {
00624 str = boost::regex_replace(str, m_match, m_format, boost::format_all | boost::format_no_copy);
00625 } catch (...) {
00626
00627 str.clear();
00628 s->write(str, ec->value, m_lookup_term);
00629
00630 m_running = false;
00631
00632 throw RegexFailure("str=" + str + ", regex=" + m_match.str());
00633 }
00634 }
00635
00636 KVP::const_iterator i = m_lookup.find(str);
00637 if (i != m_lookup.end())
00638 AnyCopied |= AssignValue(d, m_term, i->second);
00639 else
00640 switch (m_default) {
00641 case DEF_UNDEF:
00642 break;
00643 case DEF_SRCTERM:
00644 {
00645 std::string str;
00646 AnyCopied |= AssignValue(d, m_term, s->write(str, ec->value, m_lookup_term));
00647 }
00648 break;
00649 case DEF_OUTPUT:
00650 AnyCopied |= AssignValue(d, m_term, str);
00651 break;
00652 case DEF_FIXED:
00653 AnyCopied |= AssignValue(d, m_term, m_fixed);
00654 break;
00655 }
00656 ec++;
00657 }
00658 return AnyCopied;
00659 }
00660 };
00661
00663 class PION_PLATFORM_API TransformRules
00664 : public Transform
00665 {
00667 bool m_short_circuit;
00668
00670 std::vector<std::string> m_set_value;
00671
00673 std::vector<Comparison *> m_comparison;
00674
00676 std::vector<bool> m_running;
00677
00678 public:
00679
00687 TransformRules(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00688 : Transform(v, term)
00689 {
00690
00691 m_short_circuit = false;
00692 std::string short_circuit_str;
00693 if (! ConfigManager::getConfigOption(RULES_STOP_ON_FIRST_ELEMENT_NAME, short_circuit_str, config_ptr))
00694 throw MissingTransformField("Missing StopOnFirstMatch in TransformationAssignRules");
00695 if (short_circuit_str == "true")
00696 m_short_circuit = true;
00697
00698
00699 xmlNodePtr RuleNode = config_ptr;
00700 while ( (RuleNode = ConfigManager::findConfigNodeByName(RULE_ELEMENT_NAME, RuleNode)) != NULL)
00701 {
00702
00703 std::string term_id;
00704 if (! ConfigManager::getConfigOption(TERM_ELEMENT_NAME, term_id, RuleNode->children))
00705 throw MissingTransformField("Missing Source-Term in TransformationAssignRules");
00706 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00707 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00708 throw MissingTransformField("Invalid Term in TransformationAssignRules");
00709
00710
00711 std::string val;
00712 if (! ConfigManager::getConfigOption(TYPE_ELEMENT_NAME, val, RuleNode->children))
00713 throw MissingTransformField("Missing Value in TransformationAssignRules");
00714 Comparison::ComparisonType ctype = Comparison::parseComparisonType(val);
00715
00716
00717 std::string value_str;
00718 if (Comparison::requiresValue(ctype))
00719 if (! ConfigManager::getConfigOptionEmptyOk(VALUE_ELEMENT_NAME, value_str, RuleNode->children))
00720 throw MissingTransformField("Missing Value in TransformationAssignRules");
00721
00722 Comparison *comp = new Comparison(v[term_ref]);
00723 comp->configure(ctype, value_str);
00724 m_comparison.push_back(comp);
00725
00726
00727 val.clear();
00728 if (! ConfigManager::getConfigOptionEmptyOk(TRANSFORMATION_SET_VALUE_NAME, val, RuleNode->children))
00729 throw MissingTransformField("Missing SetValue in TransformationAssignRules");
00730 m_set_value.push_back(val);
00731
00732
00733 m_running.push_back(true);
00734
00735 RuleNode = RuleNode->next;
00736 }
00737 }
00738
00740 virtual ~TransformRules() {
00741 for (unsigned int i = 0; i < m_comparison.size(); i++)
00742 delete m_comparison[i];
00743 }
00744
00753 virtual bool transform(EventPtr& d, const EventPtr& s)
00754 {
00755 bool AnyAssigned = false;
00756
00757 for (unsigned int i = 0; i < m_comparison.size(); i++)
00758 if (m_running[i]) {
00759 switch (m_comparison[i]->getType()) {
00760
00761 case Comparison::TYPE_IS_DEFINED:
00762 if (s->getType() == m_comparison[i]->getTerm().term_ref || s->isDefined(m_comparison[i]->getTerm().term_ref))
00763 AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00764 break;
00765 case Comparison::TYPE_TRUE:
00766 AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00767 break;
00768 case Comparison::TYPE_FALSE:
00769 break;
00770 case Comparison::TYPE_IS_NOT_DEFINED:
00771 if (! (s->getType() == m_comparison[i]->getTerm().term_ref || s->isDefined(m_comparison[i]->getTerm().term_ref)))
00772 AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00773 break;
00774 default:
00775 {
00776 Event::ValuesRange values_range = s->equal_range(m_comparison[i]->getTerm().term_ref);
00777 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++)
00778 try {
00779 Event::ConstIterator ec_past = ec;
00780 if (m_comparison[i]->evaluateRange(std::make_pair(ec, ++ec_past))) {
00781 if (m_comparison[i]->getType() == Comparison::TYPE_REGEX) {
00782
00783
00784
00785 std::string str;
00786 str = boost::u32regex_replace(s->write(str, ec->value, m_comparison[i]->getTerm()), m_comparison[i]->getRegex(),
00787 m_set_value[i], boost::format_all | boost::format_no_copy);
00788
00789 AnyAssigned |= AssignValue(d, m_term, str);
00790 } else
00791 AnyAssigned |= AssignValue(d, m_term, m_set_value[i]);
00792 }
00793 } catch (...) {
00794
00795 std::string str;
00796
00797 m_running[i] = false;
00798
00799 throw RegexFailure("str=" + s->write(str, ec->value, m_comparison[i]->getTerm()) + ", regex=" + m_comparison[i]->getRegexStr());
00800 }
00801 }
00802 break;
00803 }
00804
00805 if (m_short_circuit && AnyAssigned)
00806 break;
00807 }
00808 return AnyAssigned;
00809 }
00810 };
00811
00813 class PION_PLATFORM_API TransformRegex
00814 : public Transform
00815 {
00817 Vocabulary::Term m_src_term;
00818
00820 std::vector<std::string> m_format;
00821
00823 std::vector<boost::u32regex> m_regex;
00824
00826 std::vector<std::string> m_regex_str;
00827
00829 std::vector<bool> m_running;
00830
00831 public:
00832
00840 TransformRegex(const Vocabulary& v, const Vocabulary::Term& term, const xmlNodePtr config_ptr)
00841 : Transform(v, term)
00842 {
00843
00844 std::string term_id;
00845 if (! ConfigManager::getConfigOption(SOURCE_TERM_ELEMENT_NAME, term_id, config_ptr))
00846 throw MissingTransformField("Missing SourceTerm in TransformationRegex");
00847 Vocabulary::TermRef term_ref = v.findTerm(term_id);
00848 if (term_ref == Vocabulary::UNDEFINED_TERM_REF)
00849 throw MissingTransformField("Invalid SourceTerm in TransformationRegex");
00850 m_src_term = v[term_ref];
00851 xmlNodePtr RegexNode = config_ptr;
00852 while ( (RegexNode = ConfigManager::findConfigNodeByName(REGEXP_ELEMENT_NAME, RegexNode)) != NULL) {
00853
00854 xmlChar *xml_char_ptr = xmlNodeGetContent(RegexNode);
00855 std::string val;
00856 if (xml_char_ptr != NULL && xml_char_ptr[0] != '\0')
00857 val = reinterpret_cast<char*>(xml_char_ptr);
00858 if (xml_char_ptr != NULL) xmlFree(xml_char_ptr);
00859 m_format.push_back(val);
00860
00861 xml_char_ptr = xmlGetProp(RegexNode, reinterpret_cast<const xmlChar*>(REGEXP_ATTRIBUTE_NAME.c_str()));
00862 if (xml_char_ptr == NULL || xml_char_ptr[0]=='\0') {
00863 if (xml_char_ptr != NULL)
00864 xmlFree(xml_char_ptr);
00865 throw MissingTransformField("Missing Regexp in TransformationRegex");
00866 }
00867 val = reinterpret_cast<char*>(xml_char_ptr);
00868 xmlFree(xml_char_ptr);
00869 boost::u32regex reg;
00870 try {
00871 reg = boost::make_u32regex(val);
00872 } catch (...) {
00873 throw MissingTransformField("Invalid regular expression in TransformationRegex");
00874 }
00875 m_regex.push_back(reg);
00876 m_regex_str.push_back(val);
00877 m_running.push_back(true);
00878 RegexNode = RegexNode->next;
00879 }
00880 if (m_regex.empty())
00881 throw MissingTransformField("No Regexp's in TransformationRegex");
00882 }
00883
00885 virtual ~TransformRegex() { }
00886
00895 virtual bool transform(EventPtr& d, const EventPtr& s)
00896 {
00897 bool AnyAssigned = false;
00898
00899 Event::ValuesRange values_range = s->equal_range(m_src_term.term_ref);
00900 for (Event::ConstIterator ec = values_range.first; ec != values_range.second; ec++) {
00901
00902 std::string str;
00903 s->write(str, ec->value, m_src_term);
00904
00905 for (unsigned int i = 0; i < m_regex.size(); i++)
00906 if (m_running[i]) {
00907 std::string res;
00908 try {
00909 res = boost::u32regex_replace(str, m_regex[i], m_format[i], boost::format_all | boost::format_no_copy);
00910 } catch (...) {
00911
00912 m_running[i] = false;
00913
00914 throw RegexFailure("str=" + str + ", regex=" + m_regex_str[i]);
00915 }
00916 if (!res.empty())
00917 str = res;
00918 }
00919 AnyAssigned |= AssignValue(d, m_term, str);
00920 }
00921 return AnyAssigned;
00922 }
00923 };
00924
00925
00939 template <class IteratorType>
00940 inline bool HideCreditCardNumbers(IteratorType first, IteratorType last)
00941 {
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970 static const boost::regex FIND_CC_NUMBER_RX("\\b(?:4(?:[\\s+-]?\\d){15}|5[1-5](?:[\\s+-]?\\d){14}|(?:34|37)(?:[\\s+-]?\\d){13}|(?:6011|65\\d\\d)(?:[\\s+-]?\\d){12}|(?:30[0-5]|36\\d|38\\d)(?:[\\s+-]?\\d){11}|35(?:[\\s+-]?\\d){14})\\b");
00971
00972
00973 boost::match_results<IteratorType> results;
00974
00975
00976 bool found_match = false;
00977
00978
00979 while (boost::regex_search(first, last, results, FIND_CC_NUMBER_RX)) {
00980
00981 found_match = true;
00982
00983
00984 for (IteratorType tmp_it = results[0].first; tmp_it != results[0].second; ++tmp_it) {
00985 *tmp_it = 'X';
00986 }
00987
00988
00989
00990 first = results[0].second;
00991 }
00992
00993 return found_match;
00994 }
00995
00996
01007 inline bool HideCreditCardNumbers(Event& e, const Vocabulary::TermRef& term_ref)
01008 {
01009
01010 bool found_match = false;
01011
01012
01013 Event::ValuesRange values_range = e.equal_range(term_ref);
01014 for (Event::ConstIterator it = values_range.first; it != values_range.second; it++) {
01015 const Event::BlobType& b(boost::get<const Event::BlobType&>(it->value));
01016 char *first = const_cast<char*>(b.get());
01017 char *last = first + b.size();
01018 if (HideCreditCardNumbers(first, last)) {
01019 found_match = true;
01020 }
01021 }
01022
01023 return found_match;
01024 }
01025
01026
01034 inline bool HideCreditCardNumbers(Event& e)
01035 {
01036
01037 bool found_match = false;
01038
01039
01040 for (Event::ConstIterator it = e.begin(); it != e.end(); it++) {
01041 if (boost::get<const Event::BlobType&>(& it->value)) {
01042 const Event::BlobType& b(boost::get<const Event::BlobType&>(it->value));
01043 char *first = const_cast<char*>(b.get());
01044 char *last = first + b.size();
01045 if (HideCreditCardNumbers(first, last)) {
01046 found_match = true;
01047 }
01048 }
01049 }
01050
01051 return found_match;
01052 }
01053
01054
01055 }
01056 }
01057
01058
01059 #endif