#include "regexp.h" // TODO: make a little more multi-byte safe // regexp match functions // A match means the entire string TEXT is used up in matching. // In the pattern string: // `*' matches any sequence of characters (zero or more) // `?' matches any character // [SET] matches any character in the specified set, // [!SET] or [^SET] matches any character not in the specified set. // A set is composed of characters or ranges; a range looks like // character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the // minimal set of characters allowed in the [..] pattern construct. // Other characters are allowed (ie. 8 bit characters) if your system // will support them. // To suppress the special syntactic significance of any of `[]*?!^-\', // and match the character exactly, precede it with a `\'. enum { MATCH_VALID = 1, /* valid match */ MATCH_END, /* premature end of pattern string */ MATCH_ABORT, /* premature end of text string */ MATCH_RANGE, /* match failure on [..] construct */ MATCH_LITERAL, /* match failure on literal match */ MATCH_PATTERN, /* bad pattern */ }; enum { PATTERN_VALID = 0, /* valid pattern */ PATTERN_ESC = -1, /* literal escape at end of pattern */ PATTERN_RANGE = -2, /* malformed range in [..] construct */ PATTERN_CLOSE = -3, /* no end bracket in [..] construct */ PATTERN_EMPTY = -4, /* [..] contstruct is empty */ }; int Matche(const regchar_t *p, const regchar_t *t); // TODO: make this multi-byte aware int matche_after_star(const regchar_t *p, const regchar_t *t) { register int match = 0; register regchar_t nextp; /* pass over existing ? and * in pattern */ while ( *p == '?' || *p == '*' ) { /* take one char for each ? and + */ if (*p == '?') { /* if end of text then no match */ if (!*t++) return MATCH_ABORT; } /* move to next char in pattern */ p++; } /* if end of pattern we have matched regardless of text left */ if (!*p) return MATCH_VALID; /* get the next character to match which must be a literal or '[' */ nextp = *p; if (nextp == '\\') { nextp = p[1]; /* if end of text then we have a bad pattern */ if (!nextp) return MATCH_PATTERN; } /* Continue until we run out of text or definite result seen */ do { /* a precondition for matching is that the next character in the pattern match the next character in the text or that the next pattern char is the beginning of a range. Increment text pointer as we go here */ if (nextp == *t || nextp == '[') match = Matche(p, t); /* if the end of text is reached then no match */ if (!*t++) match = MATCH_ABORT; } while ( match != MATCH_VALID && match != MATCH_ABORT && match != MATCH_PATTERN); /* return result */ return match; } int Matche(const regchar_t *p, const regchar_t *t) { regchar_t range_start, range_end; /* start and end in range */ bool invert; /* is this [..] or [!..] */ bool member_match; /* have I matched the [..] construct? */ bool loop; /* should I terminate? */ for ( ; *p; p++, t++) { /* if this is the end of the text then this is the end of the match */ if (!*t) { return (*p == '*' && *++p == '\0') ? MATCH_VALID : MATCH_ABORT; } /* determine and react to pattern type */ switch (*p) { case '?': /* single any character match */ break; case '*': /* multiple any character match */ return matche_after_star (p, t); /* [..] construct, single member/exclusion character match */ case '[': { /* move to beginning of range */ p++; /* check if this is a member match or exclusion match */ invert = false; if (*p == '!' || *p == '^') { invert = true; p++; } /* if closing bracket here or at range start then we have a malformed pattern */ if (*p == ']') return MATCH_PATTERN; member_match = false; loop = true; while (loop) { /* if end of construct then loop is done */ if (*p == ']') { loop = false; continue; } /* matching a '!', '^', '-', '\' or a ']' */ if (*p == '\\') range_start = range_end = *++p; else range_start = range_end = *p; /* if end of pattern then bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* check for range bar */ if (*++p == '-') { /* get the range end */ range_end = *++p; /* if end of pattern or construct then bad pattern */ if (range_end == '\0' || range_end == ']') return MATCH_PATTERN; /* special character range end */ if (range_end == '\\') { range_end = *++p; /* if end of text then we have a bad pattern */ if (!range_end) return MATCH_PATTERN; } /* move just beyond this range */ p++; } /* if the text character is in range then match found. make sure the range letters have the proper relationship to one another before comparison */ if (range_start < range_end) { if (*t >= range_start && *t <= range_end) { member_match = true; loop = false; } } else { if (*t >= range_end && *t <= range_start) { member_match = true; loop = false; } } } /* if there was a match in an exclusion set then no match */ /* if there was no match in a member set then no match */ if ((invert && member_match) || !(invert || member_match)) return MATCH_RANGE; /* if this is not an exclusion then skip the rest of the [...] construct that already matched. */ if (member_match) { while (p && *p != ']') { /* bad pattern (Missing ']') */ if (!*p) return MATCH_PATTERN; /* skip exact match */ if (*p == '\\') { p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; } /* move to next pattern char */ p++; } } break; } case '\\': /* next character is quoted and must match exactly */ /* move pattern pointer to quoted char and fall through */ p++; /* if end of text then we have a bad pattern */ if (!*p) return MATCH_PATTERN; /* must match this character exactly */ default: if (*p != *t) return MATCH_LITERAL; } } /* if end of text not reached then the pattern fails */ if (*t) return MATCH_END; else return MATCH_VALID; } bool Match(const regchar_t *match, const regchar_t *string) { if (!match) return true; int error_type; error_type = Matche(match, string); return (error_type == MATCH_VALID); }