summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2015-04-18 16:47:17 +0000
committerRich Felker <dalias@aerifal.cx>2016-01-30 20:53:17 -0500
commitda4cc13b9705e7d3a02216959b9711b3b30828c1 (patch)
tree40b725270f05a2673495e6c72ce78acd85d68ada
parent7eaa76fc2e7993582989d3838b1ac32dd8abac09 (diff)
downloadmusl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.gz
musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.bz2
musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.tar.xz
musl-da4cc13b9705e7d3a02216959b9711b3b30828c1.zip
regex: treat \| in BRE as alternation
The standard does not define semantics for \| in BRE, but some code depends on it meaning alternation. Empty alternative expression is allowed to be consistent with ERE. Based on a patch by Rob Landley.
-rw-r--r--src/regex/regcomp.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index 078f657c..f1f06afe 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -841,6 +841,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
/* reject repetitions after empty expression in BRE */
if (!ere)
return REG_BADRPT;
+ case '|':
+ /* extension: treat \| as alternation in BRE */
+ if (!ere) {
+ node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ s--;
+ goto end;
+ }
+ /* fallthrough */
default:
if (!ere && (unsigned)*s-'1' < 9) {
/* back reference */
@@ -918,6 +926,7 @@ parse_literal:
s += len;
break;
}
+end:
if (!node)
return REG_ESPACE;
ctx->n = node;
@@ -1016,13 +1025,20 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
if ((ere && *s == '|') ||
(ere && *s == ')' && depth) ||
(!ere && *s == '\\' && s[1] == ')') ||
+ /* extension: treat \| as alternation in BRE */
+ (!ere && *s == '\\' && s[1] == '|') ||
!*s) {
/* extension: empty branch is unspecified (), (|a), (a|)
here they are not rejected but match on empty string */
int c = *s;
nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
nbranch = 0;
- if (c != '|') {
+
+ if (c == '\\' && s[1] == '|') {
+ s+=2;
+ } else if (c == '|') {
+ s++;
+ } else {
if (c == '\\') {
if (!depth) return REG_EPAREN;
s+=2;
@@ -1042,7 +1058,6 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
nunion = tre_stack_pop_voidptr(stack);
goto parse_iter;
}
- s++;
}
}
}