00001 #ifndef __PATTERN_H__ 00002 #define __PATTERN_H__ 00003 00004 #include <vector> 00005 #include <string> 00006 #include <map> 00007 00008 class Matcher; 00009 class NFANode; 00010 class NFAQuantifierNode; 00011 00968 class Pattern 00969 { 00970 friend class Matcher; 00971 friend class NFANode; 00972 friend class NFAQuantifierNode; 00973 private: 00981 Pattern(const std::string & rhs); 00982 protected: 00987 static std::map<std::string, Pattern *> compiledPatterns; 00993 static std::map<std::string, std::pair<std::string, unsigned long> > registeredPatterns; 00994 protected: 00999 std::map<NFANode*, bool> nodes; 01005 Matcher * matcher; 01009 NFANode * head; 01013 std::string pattern; 01018 bool error; 01024 int curInd; 01028 int groupCount; 01032 int nonCapGroupCount; 01036 unsigned long flags; 01037 protected: 01042 void raiseError(); 01048 NFANode * registerNode(NFANode * node); 01049 01059 std::string classUnion (std::string s1, std::string s2) const; 01069 std::string classIntersect (std::string s1, std::string s2) const; 01079 std::string classNegate (std::string s1) const; 01089 std::string classCreateRange(char low, char hi) const; 01090 01099 int getInt(int start, int end); 01111 bool quantifyCurly(int & sNum, int & eNum); 01123 NFANode * quantifyGroup(NFANode * start, NFANode * stop, const int gn); 01124 01132 NFANode * quantify(NFANode * newNode); 01139 std::string parseClass(); 01146 std::string parsePosix(); 01151 std::string parseOctal(); 01156 std::string parseHex(); 01161 NFANode * parseBackref(); 01171 std::string parseEscape(bool & inv, bool & quo); 01180 NFANode * parseRegisteredPattern(NFANode ** end); 01188 NFANode * parseBehind(const bool pos, NFANode ** end); 01193 NFANode * parseQuote(); 01202 NFANode * parse(const bool inParen = 0, const bool inOr = 0, NFANode ** end = NULL); 01203 public: 01205 const static unsigned long CASE_INSENSITIVE; 01207 const static unsigned long LITERAL; 01209 const static unsigned long DOT_MATCHES_ALL; 01213 const static unsigned long MULTILINE_MATCHING; 01217 const static unsigned long UNIX_LINE_MODE; 01219 const static int MIN_QMATCH; 01221 const static int MAX_QMATCH; 01222 public: 01235 static Pattern * compile (const std::string & pattern, 01236 const unsigned long mode = 0); 01250 static Pattern * compileAndKeep (const std::string & pattern, 01251 const unsigned long mode = 0); 01252 01272 static std::string replace (const std::string & pattern, 01273 const std::string & str, 01274 const std::string & replacementText, 01275 const unsigned long mode = 0); 01276 01300 static std::vector<std::string> split (const std::string & pattern, 01301 const std::string & str, 01302 const bool keepEmptys = 0, 01303 const unsigned long limit = 0, 01304 const unsigned long mode = 0); 01305 01324 static std::vector<std::string> findAll (const std::string & pattern, 01325 const std::string & str, 01326 const unsigned long mode = 0); 01327 01337 static bool matches (const std::string & pattern, 01338 const std::string & str, 01339 const unsigned long mode = 0); 01340 01360 static bool registerPattern(const std::string & name, 01361 const std::string & pattern, 01362 const unsigned long mode = 0); 01363 01367 static void unregisterPatterns(); 01371 static void clearPatternCache(); 01372 01394 static std::pair<std::string, int> findNthMatch (const std::string & pattern, 01395 const std::string & str, 01396 const int matchNum, 01397 const unsigned long mode = 0); 01398 public: 01402 ~Pattern(); 01403 01404 std::string replace (const std::string & str, 01405 const std::string & replacementText); 01406 std::vector<std::string> split (const std::string & str, const bool keepEmptys = 0, 01407 const unsigned long limit = 0); 01408 std::vector<std::string> findAll (const std::string & str); 01409 bool matches (const std::string & str); 01414 unsigned long getFlags () const; 01419 std::string getPattern () const; 01426 Matcher * createMatcher (const std::string & str); 01427 }; 01428 01429 class NFANode 01430 { 01431 friend class Matcher; 01432 public: 01433 NFANode * next; 01434 NFANode(); 01435 virtual ~NFANode(); 01436 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 01437 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const = 0; 01438 inline virtual bool isGroupHeadNode() const { return false; } 01439 inline virtual bool isStartOfInputNode() const { return false; } 01440 }; 01441 class NFACharNode : public NFANode 01442 { 01443 protected: 01444 char ch; 01445 public: 01446 NFACharNode(const char c); 01447 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01448 }; 01449 class NFACICharNode : public NFANode 01450 { 01451 protected: 01452 char ch; 01453 public: 01454 NFACICharNode(const char c); 01455 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01456 }; 01457 class NFAStartNode : public NFANode 01458 { 01459 public: 01460 NFAStartNode(); 01461 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01462 }; 01463 class NFAEndNode : public NFANode 01464 { 01465 public: 01466 NFAEndNode(); 01467 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01468 }; 01469 class NFAQuantifierNode : public NFANode 01470 { 01471 public: 01472 int min, max; 01473 NFANode * inner; 01474 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 01475 NFAQuantifierNode(Pattern * pat, NFANode * internal, 01476 const int minMatch = Pattern::MIN_QMATCH, 01477 const int maxMatch = Pattern::MAX_QMATCH); 01478 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01479 }; 01480 class NFAGreedyQuantifierNode : public NFAQuantifierNode 01481 { 01482 public: 01483 NFAGreedyQuantifierNode(Pattern * pat, NFANode * internal, 01484 const int minMatch = Pattern::MIN_QMATCH, 01485 const int maxMatch = Pattern::MAX_QMATCH); 01486 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01487 virtual int matchInternal(const std::string & str, Matcher * matcher, const int curInd, const int soFar) const; 01488 }; 01489 class NFALazyQuantifierNode : public NFAQuantifierNode 01490 { 01491 public: 01492 NFALazyQuantifierNode(Pattern * pat, NFANode * internal, 01493 const int minMatch = Pattern::MIN_QMATCH, 01494 const int maxMatch = Pattern::MAX_QMATCH); 01495 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01496 }; 01497 class NFAPossessiveQuantifierNode : public NFAQuantifierNode 01498 { 01499 public: 01500 NFAPossessiveQuantifierNode(Pattern * pat, NFANode * internal, 01501 const int minMatch = Pattern::MIN_QMATCH, 01502 const int maxMatch = Pattern::MAX_QMATCH); 01503 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01504 }; 01505 class NFAAcceptNode : public NFANode 01506 { 01507 public: 01508 NFAAcceptNode(); 01509 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01510 }; 01511 class NFAClassNode : public NFANode 01512 { 01513 public: 01514 bool inv; 01515 std::map<char, bool> vals; 01516 NFAClassNode(const bool invert = 0); 01517 NFAClassNode(const std::string & clazz, const bool invert); 01518 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01519 }; 01520 class NFACIClassNode : public NFANode 01521 { 01522 public: 01523 bool inv; 01524 std::map<char, bool> vals; 01525 NFACIClassNode(const bool invert = 0); 01526 NFACIClassNode(const std::string & clazz, const bool invert); 01527 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01528 }; 01529 class NFASubStartNode : public NFANode 01530 { 01531 public: 01532 NFASubStartNode(); 01533 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01534 }; 01535 class NFAOrNode : public NFANode 01536 { 01537 public: 01538 NFANode * one; 01539 NFANode * two; 01540 NFAOrNode(NFANode * first, NFANode * second); 01541 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 01542 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01543 }; 01544 class NFAQuoteNode : public NFANode 01545 { 01546 public: 01547 std::string qStr; 01548 NFAQuoteNode(const std::string & quoted); 01549 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01550 }; 01551 class NFACIQuoteNode : public NFANode 01552 { 01553 public: 01554 std::string qStr; 01555 NFACIQuoteNode(const std::string & quoted); 01556 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01557 }; 01558 class NFALookAheadNode : public NFANode 01559 { 01560 public: 01561 bool pos; 01562 NFANode * inner; 01563 NFALookAheadNode(NFANode * internal, const bool positive); 01564 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 01565 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01566 }; 01567 class NFALookBehindNode : public NFANode 01568 { 01569 public: 01570 bool pos; 01571 std::string mStr; 01572 NFALookBehindNode(const std::string & str, const bool positive); 01573 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01574 }; 01575 class NFAStartOfLineNode : public NFANode 01576 { 01577 public: 01578 NFAStartOfLineNode(); 01579 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01580 }; 01581 class NFAEndOfLineNode : public NFANode 01582 { 01583 public: 01584 NFAEndOfLineNode(); 01585 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01586 }; 01587 class NFAReferenceNode : public NFANode 01588 { 01589 public: 01590 int gi; 01591 NFAReferenceNode(const int groupIndex); 01592 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01593 }; 01594 class NFAStartOfInputNode : public NFANode 01595 { 01596 public: 01597 NFAStartOfInputNode(); 01598 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01599 inline virtual bool isStartOfInputNode() const { return true; } 01600 }; 01601 class NFAEndOfInputNode : public NFANode 01602 { 01603 public: 01604 bool term; 01605 NFAEndOfInputNode(const bool lookForTerm); 01606 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01607 }; 01608 class NFAWordBoundaryNode : public NFANode 01609 { 01610 public: 01611 bool pos; 01612 NFAWordBoundaryNode(const bool positive); 01613 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01614 }; 01615 class NFAEndOfMatchNode : public NFANode 01616 { 01617 public: 01618 NFAEndOfMatchNode(); 01619 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01620 }; 01621 class NFAGroupHeadNode : public NFANode 01622 { 01623 public: 01624 int gi; 01625 NFAGroupHeadNode(const int groupIndex); 01626 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01627 inline virtual bool isGroupHeadNode() const { return true; } 01628 }; 01629 class NFAGroupTailNode : public NFANode 01630 { 01631 public: 01632 int gi; 01633 NFAGroupTailNode(const int groupIndex); 01634 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01635 }; 01636 class NFAGroupLoopPrologueNode : public NFANode 01637 { 01638 public: 01639 int gi; 01640 NFAGroupLoopPrologueNode(const int groupIndex); 01641 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01642 }; 01643 class NFAGroupLoopNode : public NFANode 01644 { 01645 public: 01646 int gi, min, max, type; 01647 NFANode * inner; 01648 NFAGroupLoopNode(NFANode * internal, const int minMatch, 01649 const int maxMatch, const int groupIndex, const int matchType); 01650 virtual void findAllNodes(std::map<NFANode*, bool> & soFar); 01651 virtual int match(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01652 int matchGreedy(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01653 int matchLazy(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01654 int matchPossessive(const std::string & str, Matcher * matcher, const int curInd = 0) const; 01655 }; 01656 01657 #endif 01658