summaryrefslogtreecommitdiff
path: root/src/regex/tre.h
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2012-03-20 19:44:05 -0400
committerRich Felker <dalias@aerifal.cx>2012-03-20 19:44:05 -0400
commitad47d45e9da8df364cb0a61b6146d51c196c8891 (patch)
tree25b0dc14b0a56306c671dfdfd69b4a8b0f3cbcd8 /src/regex/tre.h
parentbaa43bca0a051e8deb0d6a9a8882ceeea5c27249 (diff)
downloadmusl-ad47d45e9da8df364cb0a61b6146d51c196c8891.tar.gz
musl-ad47d45e9da8df364cb0a61b6146d51c196c8891.tar.bz2
musl-ad47d45e9da8df364cb0a61b6146d51c196c8891.tar.xz
musl-ad47d45e9da8df364cb0a61b6146d51c196c8891.zip
upgrade to latest upstream TRE regex code (0.8.0)
the main practical results of this change are 1. the regex code is no longer subject to LGPL; it's now 2-clause BSD 2. most (all?) popular nonstandard regex extensions are supported I hesitate to call this a "sync" since both the old and new code are heavily modified. in one sense, the old code was "more severely" modified, in that it was actively hostile to non-strictly-conforming expressions. on the other hand, the new code has eliminated the useless translation of the entire regex string to wchar_t prior to compiling, and now only converts multibyte character literals as needed. in the future i may use this modified TRE as a basis for writing the long-planned new regex engine that will avoid multibyte-to-wide character conversion entirely by compiling multibyte bracket expressions specific to UTF-8.
Diffstat (limited to 'src/regex/tre.h')
-rw-r--r--src/regex/tre.h99
1 files changed, 31 insertions, 68 deletions
diff --git a/src/regex/tre.h b/src/regex/tre.h
index bfd171f4..d6e1c2a7 100644
--- a/src/regex/tre.h
+++ b/src/regex/tre.h
@@ -1,21 +1,31 @@
/*
tre-internal.h - TRE internal definitions
- Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@@ -23,12 +33,7 @@
#include <wchar.h>
#include <wctype.h>
-#define TRE_MULTIBYTE 1
#undef TRE_MBSTATE
-#define TRE_WCHAR 1
-#define TRE_USE_SYSTEM_WCTYPE 1
-#define HAVE_WCSTOMBS 1
-#define TRE_MB_CUR_MAX MB_CUR_MAX
#define NDEBUG
@@ -37,33 +42,16 @@ typedef int reg_errcode_t;
typedef wchar_t tre_char_t;
-
-#ifdef TRE_DEBUG
-#include <stdio.h>
-#define DPRINT(msg) do {printf msg; fflush(stdout);} while(0)
-#else /* !TRE_DEBUG */
#define DPRINT(msg) do { } while(0)
-#endif /* !TRE_DEBUG */
#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
-#if 1
-int __mbtowc(wchar_t *, const char *);
-#define tre_mbrtowc(pwc, s, n, ps) (__mbtowc((pwc), (s)))
-#else
#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
-#endif
/* Wide characters. */
typedef wint_t tre_cint_t;
#define TRE_CHAR_MAX WCHAR_MAX
-#ifdef TRE_MULTIBYTE
-#define TRE_MB_CUR_MAX MB_CUR_MAX
-#else /* !TRE_MULTIBYTE */
-#define TRE_MB_CUR_MAX 1
-#endif /* !TRE_MULTIBYTE */
-
#define tre_isalnum iswalnum
#define tre_isalpha iswalpha
#define tre_isblank iswblank
@@ -98,9 +86,6 @@ typedef wctype_t tre_ctype_t;
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
-/* Define STRF to the correct printf formatter for strings. */
-#define STRF "ls"
-
/* TNFA transition type. A TNFA state is an array of transitions,
the terminator is a transition with NULL `state'. */
typedef struct tnfa_transition tre_tnfa_transition_t;
@@ -170,42 +155,21 @@ struct tnfa {
tre_tnfa_transition_t *initial;
tre_tnfa_transition_t *final;
tre_submatch_data_t *submatch_data;
+ char *firstpos_chars;
+ int first_char;
unsigned int num_submatches;
tre_tag_direction_t *tag_directions;
+ int *minimal_tags;
int num_tags;
+ int num_minimals;
int end_tag;
int num_states;
int cflags;
int have_backrefs;
+ int have_approx;
+ int params_depth;
};
-#if 0
-static int
-tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags);
-
-static void
-tre_free(regex_t *preg);
-
-static void
-tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
- const tre_tnfa_t *tnfa, int *tags, int match_eo);
-
-static reg_errcode_t
-tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
- tre_str_type_t type, int *match_tags, int eflags,
- int *match_end_ofs);
-
-static reg_errcode_t
-tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
- tre_str_type_t type, int *match_tags, int eflags,
- int *match_end_ofs);
-
-static reg_errcode_t
-tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
- int len, tre_str_type_t type, int *match_tags,
- int eflags, int *match_end_ofs);
-#endif
-
/* from tre-mem.h: */
#define TRE_MEM_BLOCK_SIZE 1024
@@ -266,4 +230,3 @@ void tre_mem_destroy(tre_mem_t mem);
#define xfree free
#define xrealloc realloc
-/* EOF */