[PATCH v2] MdeModulePkg/RegularExpressionDxe: Make oniguruma a submodule in edk2.


Zhang, Shenglei
 

Use submodule way to access oniguruma. And upgrade oniguruma
version from v6.9.3 to v6.9.4_mark1.
REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2073

Cc: Jian J Wang <jian.j.wang@...>
Cc: Hao A Wu <hao.a.wu@...>
Cc: Liming Gao <liming.gao@...>
Signed-off-by: Shenglei Zhang <shenglei.zhang@...>
---

v2: Include wrapper files in RegularExpressionDxe.inf.
Patch link: https://github.com/shenglei10/edk2/commits/oniguruma

.../Oniguruma/OnigurumaUefiPort.c | 26 -
.../RegularExpressionDxe/Oniguruma/ascii.c | 118 -
.../Oniguruma/onig_init.c | 45 -
.../RegularExpressionDxe/Oniguruma/regcomp.c | 6972 ----
.../RegularExpressionDxe/Oniguruma/regenc.c | 1029 -
.../RegularExpressionDxe/Oniguruma/regerror.c | 402 -
.../RegularExpressionDxe/Oniguruma/regexec.c | 5874 ---
.../RegularExpressionDxe/Oniguruma/reggnu.c | 131 -
.../RegularExpressionDxe/Oniguruma/regparse.c | 8461 -----
.../Oniguruma/regposerr.c | 108 -
.../RegularExpressionDxe/Oniguruma/regposix.c | 304 -
.../Oniguruma/regsyntax.c | 336 -
.../RegularExpressionDxe/Oniguruma/regtrav.c | 76 -
.../Oniguruma/regversion.c | 57 -
.../RegularExpressionDxe/Oniguruma/st.c | 588 -
.../RegularExpressionDxe/Oniguruma/unicode.c | 1152 -
.../Oniguruma/unicode_egcb_data.c | 1374 -
.../Oniguruma/unicode_fold1_key.c | 2995 --
.../Oniguruma/unicode_fold2_key.c | 222 -
.../Oniguruma/unicode_fold3_key.c | 133 -
.../Oniguruma/unicode_fold_data.c | 1522 -
.../Oniguruma/unicode_property_data.c | 30388 ----------------
.../Oniguruma/unicode_property_data_posix.c | 5347 ---
.../Oniguruma/unicode_unfold_key.c | 3299 --
.../Oniguruma/unicode_wb_data.c | 1023 -
.../RegularExpressionDxe/Oniguruma/utf16_le.c | 309 -
.../{Oniguruma => }/OnigurumaIntrinsics.c | 1 +
.../RegularExpressionDxe/OnigurumaUefiPort.c | 90 +
.gitmodules | 3 +
.../RegularExpressionDxe/Oniguruma/AUTHORS | 1 -
.../RegularExpressionDxe/Oniguruma/COPYING | 26 -
.../RegularExpressionDxe/Oniguruma/README | 195 -
.../RegularExpressionDxe/Oniguruma/oniggnu.h | 87 -
.../Oniguruma/onigposix.h | 172 -
.../Oniguruma/oniguruma.h | 1014 -
.../RegularExpressionDxe/Oniguruma/regenc.h | 279 -
.../RegularExpressionDxe/Oniguruma/regint.h | 1117 -
.../RegularExpressionDxe/Oniguruma/regparse.h | 455 -
.../RegularExpressionDxe/Oniguruma/st.h | 69 -
.../{Oniguruma => }/OnigurumaUefiPort.h | 44 +-
.../RegularExpressionDxe.h | 3 +-
.../RegularExpressionDxe.inf | 73 +-
.../Universal/RegularExpressionDxe/config.h | 9 +
.../Universal/RegularExpressionDxe/oniguruma | 1 +
.../Universal/RegularExpressionDxe/stdarg.h | 9 +
.../Universal/RegularExpressionDxe/stddef.h | 9 +
.../Universal/RegularExpressionDxe/stdio.h | 9 +
.../Universal/RegularExpressionDxe/stdlib.h | 9 +
.../Universal/RegularExpressionDxe/string.h | 9 +
49 files changed, 230 insertions(+), 75745 deletions(-)
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/ascii.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onig_init.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposerr.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regposix.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regsyntax.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regtrav.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regversion.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_egcb_data.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_fold1_key.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_fold2_key.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_fold3_key.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_fold_data.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_property_data.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_property_data_posix.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_unfold_key.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/unicode_wb_data.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/utf16_le.c
rename MdeModulePkg/Universal/RegularExpressionDxe/{Oniguruma => }/OnigurumaIntrinsics.c (91%)
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/OnigurumaUefiPort.c
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/AUTHORS
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/COPYING
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/README
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniggnu.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onigposix.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/oniguruma.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regint.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.h
delete mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/st.h
rename MdeModulePkg/Universal/RegularExpressionDxe/{Oniguruma => }/OnigurumaUefiPort.h (62%)
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/config.h
create mode 160000 MdeModulePkg/Universal/RegularExpressionDxe/oniguruma
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/stdarg.h
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/stddef.h
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/stdio.h
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/stdlib.h
create mode 100644 MdeModulePkg/Universal/RegularExpressionDxe/string.h

diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c
deleted file mode 100644
index 376fcabd760c..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/OnigurumaUefiPort.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/** @file
-
- Module to rewrite stdlib references within Oniguruma
-
- (C) Copyright 2014-2015 Hewlett Packard Enterprise Development LP<BR>
-
- SPDX-License-Identifier: BSD-2-Clause-Patent
-**/
-#include "OnigurumaUefiPort.h"
-
-int EFIAPI sprintf_s(char *str, size_t sizeOfBuffer, char const *fmt, ...)
-{
- VA_LIST Marker;
- int NumberOfPrinted;
-
- VA_START (Marker, fmt);
- NumberOfPrinted = (int)AsciiVSPrint (str, sizeOfBuffer, fmt, Marker);
- VA_END (Marker);
-
- return NumberOfPrinted;
-}
-
-int OnigStrCmp (char* Str1, char* Str2)
-{
- return (int)AsciiStrCmp (Str1, Str2);
-}
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/ascii.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/ascii.c
deleted file mode 100644
index 2527c02b323a..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/ascii.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/**********************************************************************
- ascii.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h" /* for USE_CALLOUT */
-
-static int
-init(void)
-{
-#ifdef USE_CALLOUT
-
- int id;
- OnigEncoding enc;
- char* name;
- unsigned int args[4];
- OnigValue opts[4];
-
- enc = ONIG_ENCODING_ASCII;
-
- name = "FAIL"; BC0_P(name, fail);
- name = "MISMATCH"; BC0_P(name, mismatch);
-
- name = "MAX";
- args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
- args[1] = ONIG_TYPE_CHAR;
- opts[0].c = 'X';
- BC_B_O(name, max, 2, args, 1, opts);
-
- name = "ERROR";
- args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
- BC_P_O(name, error, 1, args, 1, opts);
-
- name = "COUNT";
- args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
- BC_B_O(name, count, 1, args, 1, opts);
-
- name = "TOTAL_COUNT";
- args[0] = ONIG_TYPE_CHAR; opts[0].c = '>';
- BC_B_O(name, total_count, 1, args, 1, opts);
-
- name = "CMP";
- args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
- args[1] = ONIG_TYPE_STRING;
- args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
- BC_P(name, cmp, 3, args);
-
-#endif /* USE_CALLOUT */
-
- return ONIG_NORMAL;
-}
-
-#if 0
-static int
-is_initialized(void)
-{
- /* Don't use this function */
- /* can't answer, because builtin callout entries removed in onig_end() */
- return 0;
-}
-#endif
-
-static int
-ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
-}
-
-OnigEncodingType OnigEncodingASCII = {
- onigenc_single_byte_mbc_enc_len,
- "US-ASCII", /* name */
- 1, /* max enc length */
- 1, /* min enc length */
- onigenc_is_mbc_newline_0x0a,
- onigenc_single_byte_mbc_to_code,
- onigenc_single_byte_code_to_mbclen,
- onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_case_fold,
- onigenc_ascii_apply_all_case_fold,
- onigenc_ascii_get_case_fold_codes_by_str,
- onigenc_minimum_property_name_to_ctype,
- ascii_is_code_ctype,
- onigenc_not_support_get_ctype_code_range,
- onigenc_single_byte_left_adjust_char_head,
- onigenc_always_true_is_allowed_reverse_match,
- init,
- 0, /* is_initialized */
- onigenc_always_true_is_valid_mbc_string,
- ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_SKIP_OFFSET_1,
- 0, 0
-};
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onig_init.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onig_init.c
deleted file mode 100644
index 9fc6dca3c073..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/onig_init.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/**********************************************************************
- onig_init.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2016-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h"
-
-/* onig_init(): deprecated function */
-extern int
-onig_init(void)
-{
-#if 0
- OnigEncoding encs[] = {
- ONIG_ENCODING_UTF8
- };
-
- return onig_initialize(encs, sizeof(encs)/sizeof(encs[0]));
-#else
- return onig_initialize(0, 0);
-#endif
-}
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
deleted file mode 100644
index d847ca4233e2..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regcomp.c
+++ /dev/null
@@ -1,6972 +0,0 @@
-/**********************************************************************
- regcomp.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regparse.h"
-
-#define OPS_INIT_SIZE 8
-
-OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
-
-#if 0
-typedef struct {
- int n;
- int alloc;
- int* v;
-} int_stack;
-
-static int
-make_int_stack(int_stack** rs, int init_size)
-{
- int_stack* s;
- int* v;
-
- *rs = 0;
-
- s = xmalloc(sizeof(*s));
- if (IS_NULL(s)) return ONIGERR_MEMORY;
-
- v = (int* )xmalloc(sizeof(int) * init_size);
- if (IS_NULL(v)) {
- xfree(s);
- return ONIGERR_MEMORY;
- }
-
- s->n = 0;
- s->alloc = init_size;
- s->v = v;
-
- *rs = s;
- return ONIG_NORMAL;
-}
-
-static void
-free_int_stack(int_stack* s)
-{
- if (IS_NOT_NULL(s)) {
- if (IS_NOT_NULL(s->v))
- xfree(s->v);
- xfree(s);
- }
-}
-
-static int
-int_stack_push(int_stack* s, int v)
-{
- if (s->n >= s->alloc) {
- int new_size = s->alloc * 2;
- int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size, sizeof(int) * s->alloc);
- if (IS_NULL(nv)) return ONIGERR_MEMORY;
-
- s->alloc = new_size;
- s->v = nv;
- }
-
- s->v[s->n] = v;
- s->n++;
- return ONIG_NORMAL;
-}
-
-static int
-int_stack_pop(int_stack* s)
-{
- int v;
-
-#ifdef ONIG_DEBUG
- if (s->n <= 0) {
- fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
- return 0;
- }
-#endif
-
- v = s->v[s->n];
- s->n--;
- return v;
-}
-#endif
-
-static int
-ops_init(regex_t* reg, int init_alloc_size)
-{
- Operation* p;
- size_t size;
-
- if (init_alloc_size > 0) {
- size = sizeof(Operation) * init_alloc_size;
- p = (Operation* )xmalloc(size);
- CHECK_NULL_RETURN_MEMERR(p);
-#ifdef USE_DIRECT_THREADED_CODE
- {
- enum OpCode* cp;
- size = sizeof(enum OpCode) * init_alloc_size;
- cp = (enum OpCode* )xmalloc(size);
- CHECK_NULL_RETURN_MEMERR(cp);
- reg->ocs = cp;
- }
-#endif
- }
- else {
- p = (Operation* )0;
-#ifdef USE_DIRECT_THREADED_CODE
- reg->ocs = (enum OpCode* )0;
-#endif
- }
-
- reg->ops = p;
- reg->ops_curr = 0; /* !!! not yet done ops_new() */
- reg->ops_alloc = init_alloc_size;
- reg->ops_used = 0;
-
- return ONIG_NORMAL;
-}
-
-static int
-ops_expand(regex_t* reg, int n)
-{
-#define MIN_OPS_EXPAND_SIZE 4
-
-#ifdef USE_DIRECT_THREADED_CODE
- enum OpCode* cp;
-#endif
- Operation* p;
- size_t size;
-
- if (n <= 0) n = MIN_OPS_EXPAND_SIZE;
-
- n += reg->ops_alloc;
-
- size = sizeof(Operation) * n;
- p = (Operation* )xrealloc(reg->ops, size, sizeof(Operation) * reg->ops_alloc);
- CHECK_NULL_RETURN_MEMERR(p);
-
-#ifdef USE_DIRECT_THREADED_CODE
- size = sizeof(enum OpCode) * n;
- cp = (enum OpCode* )xrealloc(reg->ocs, size, sizeof(enum OpCode) * reg->ops_alloc);
- CHECK_NULL_RETURN_MEMERR(cp);
- reg->ocs = cp;
-#endif
-
- reg->ops = p;
- reg->ops_alloc = n;
- if (reg->ops_used == 0)
- reg->ops_curr = 0;
- else
- reg->ops_curr = reg->ops + (reg->ops_used - 1);
-
- return ONIG_NORMAL;
-}
-
-static int
-ops_new(regex_t* reg)
-{
- int r;
-
- if (reg->ops_used >= reg->ops_alloc) {
- r = ops_expand(reg, reg->ops_alloc);
- if (r != ONIG_NORMAL) return r;
- }
-
- reg->ops_curr = reg->ops + reg->ops_used;
- reg->ops_used++;
-
- xmemset(reg->ops_curr, 0, sizeof(Operation));
- return ONIG_NORMAL;
-}
-
-static int
-is_in_string_pool(regex_t* reg, UChar* s)
-{
- return (s >= reg->string_pool && s < reg->string_pool_end);
-}
-
-static void
-ops_free(regex_t* reg)
-{
- int i;
-
- if (IS_NULL(reg->ops)) return ;
-
- for (i = 0; i < (int )reg->ops_used; i++) {
- enum OpCode opcode;
- Operation* op;
-
- op = reg->ops + i;
-
-#ifdef USE_DIRECT_THREADED_CODE
- opcode = *(reg->ocs + i);
-#else
- opcode = op->opcode;
-#endif
-
- switch (opcode) {
- case OP_EXACTMBN:
- if (! is_in_string_pool(reg, op->exact_len_n.s))
- xfree(op->exact_len_n.s);
- break;
- case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:
- if (! is_in_string_pool(reg, op->exact_n.s))
- xfree(op->exact_n.s);
- break;
- case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:
- case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:
- case OP_EXACTMB2N3: case OP_EXACT1_IC:
- break;
-
- case OP_CCLASS_NOT: case OP_CCLASS:
- xfree(op->cclass.bsp);
- break;
-
- case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
- xfree(op->cclass_mb.mb);
- break;
- case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
- xfree(op->cclass_mix.mb);
- xfree(op->cclass_mix.bsp);
- break;
-
- case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
- break;
- case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:
- case OP_BACKREF_WITH_LEVEL:
- case OP_BACKREF_WITH_LEVEL_IC:
- case OP_BACKREF_CHECK:
- case OP_BACKREF_CHECK_WITH_LEVEL:
- if (op->backref_general.num != 1)
- xfree(op->backref_general.ns);
- break;
-
- default:
- break;
- }
- }
-
- xfree(reg->ops);
-#ifdef USE_DIRECT_THREADED_CODE
- xfree(reg->ocs);
- reg->ocs = 0;
-#endif
-
- reg->ops = 0;
- reg->ops_curr = 0;
- reg->ops_alloc = 0;
- reg->ops_used = 0;
-}
-
-static int
-ops_calc_size_of_string_pool(regex_t* reg)
-{
- int i;
- int total;
-
- if (IS_NULL(reg->ops)) return 0;
-
- total = 0;
- for (i = 0; i < (int )reg->ops_used; i++) {
- enum OpCode opcode;
- Operation* op;
-
- op = reg->ops + i;
-#ifdef USE_DIRECT_THREADED_CODE
- opcode = *(reg->ocs + i);
-#else
- opcode = op->opcode;
-#endif
-
- switch (opcode) {
- case OP_EXACTMBN:
- total += op->exact_len_n.len * op->exact_len_n.n;
- break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
- total += op->exact_n.n;
- break;
- case OP_EXACTMB2N:
- total += op->exact_n.n * 2;
- break;
- case OP_EXACTMB3N:
- total += op->exact_n.n * 3;
- break;
-
- default:
- break;
- }
- }
-
- return total;
-}
-
-static int
-ops_make_string_pool(regex_t* reg)
-{
- int i;
- int len;
- int size;
- UChar* pool;
- UChar* curr;
-
- size = ops_calc_size_of_string_pool(reg);
- if (size <= 0) {
- return 0;
- }
-
- curr = pool = (UChar* )xmalloc((size_t )size);
- CHECK_NULL_RETURN_MEMERR(pool);
-
- for (i = 0; i < (int )reg->ops_used; i++) {
- enum OpCode opcode;
- Operation* op;
-
- op = reg->ops + i;
-#ifdef USE_DIRECT_THREADED_CODE
- opcode = *(reg->ocs + i);
-#else
- opcode = op->opcode;
-#endif
-
- switch (opcode) {
- case OP_EXACTMBN:
- len = op->exact_len_n.len * op->exact_len_n.n;
- xmemcpy(curr, op->exact_len_n.s, len);
- xfree(op->exact_len_n.s);
- op->exact_len_n.s = curr;
- curr += len;
- break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
- len = op->exact_n.n;
- copy:
- xmemcpy(curr, op->exact_n.s, len);
- xfree(op->exact_n.s);
- op->exact_n.s = curr;
- curr += len;
- break;
- case OP_EXACTMB2N:
- len = op->exact_n.n * 2;
- goto copy;
- break;
- case OP_EXACTMB3N:
- len = op->exact_n.n * 3;
- goto copy;
- break;
-
- default:
- break;
- }
- }
-
- reg->string_pool = pool;
- reg->string_pool_end = pool + size;
- return 0;
-}
-
-extern OnigCaseFoldType
-onig_get_default_case_fold_flag(void)
-{
- return OnigDefaultCaseFoldFlag;
-}
-
-extern int
-onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
-{
- OnigDefaultCaseFoldFlag = case_fold_flag;
- return 0;
-}
-
-static int
-int_multiply_cmp(int x, int y, int v)
-{
- if (x == 0 || y == 0) return -1;
-
- if (x < INT_MAX / y) {
- int xy = x * y;
- if (xy > v) return 1;
- else {
- if (xy == v) return 0;
- else return -1;
- }
- }
- else
- return 1;
-}
-
-extern int
-onig_positive_int_multiply(int x, int y)
-{
- if (x == 0 || y == 0) return 0;
-
- if (x < INT_MAX / y)
- return x * y;
- else
- return -1;
-}
-
-
-static void
-swap_node(Node* a, Node* b)
-{
- Node c;
-
- c = *a; *a = *b; *b = c;
-
- if (NODE_TYPE(a) == NODE_STRING) {
- StrNode* sn = STR_(a);
- if (sn->capacity == 0) {
- int len = (int )(sn->end - sn->s);
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
-
- if (NODE_TYPE(b) == NODE_STRING) {
- StrNode* sn = STR_(b);
- if (sn->capacity == 0) {
- int len = (int )(sn->end - sn->s);
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
-}
-
-static OnigLen
-distance_add(OnigLen d1, OnigLen d2)
-{
- if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
- return INFINITE_LEN;
- else {
- if (d1 <= INFINITE_LEN - d2) return d1 + d2;
- else return INFINITE_LEN;
- }
-}
-
-static OnigLen
-distance_multiply(OnigLen d, int m)
-{
- if (m == 0) return 0;
-
- if (d < INFINITE_LEN / m)
- return d * m;
- else
- return INFINITE_LEN;
-}
-
-static int
-bitset_is_empty(BitSetRef bs)
-{
- int i;
-
- for (i = 0; i < (int )BITSET_SIZE; i++) {
- if (bs[i] != 0) return 0;
- }
- return 1;
-}
-
-#ifdef USE_CALL
-
-static int
-unset_addr_list_init(UnsetAddrList* list, int size)
-{
- UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
- CHECK_NULL_RETURN_MEMERR(p);
-
- list->num = 0;
- list->alloc = size;
- list->us = p;
- return 0;
-}
-
-static void
-unset_addr_list_end(UnsetAddrList* list)
-{
- if (IS_NOT_NULL(list->us))
- xfree(list->us);
-}
-
-static int
-unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
-{
- UnsetAddr* p;
- int size;
-
- if (list->num >= list->alloc) {
- size = list->alloc * 2;
- p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size, sizeof(UnsetAddr)* list->alloc);
- CHECK_NULL_RETURN_MEMERR(p);
- list->alloc = size;
- list->us = p;
- }
-
- list->us[list->num].offset = offset;
- list->us[list->num].target = node;
- list->num++;
- return 0;
-}
-#endif /* USE_CALL */
-
-
-static int
-add_op(regex_t* reg, int opcode)
-{
- int r;
-
- r = ops_new(reg);
- if (r != ONIG_NORMAL) return r;
-
-#ifdef USE_DIRECT_THREADED_CODE
- *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
-#else
- reg->ops_curr->opcode = opcode;
-#endif
-
- return 0;
-}
-
-static int compile_length_tree(Node* node, regex_t* reg);
-static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
-
-
-#define IS_NEED_STR_LEN_OP_EXACT(op) \
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
-
-static int
-select_str_opcode(int mb_len, int str_len, int ignore_case)
-{
- int op;
-
- if (ignore_case) {
- switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
- }
- }
- else {
- switch (mb_len) {
- case 1:
- switch (str_len) {
- case 1: op = OP_EXACT1; break;
- case 2: op = OP_EXACT2; break;
- case 3: op = OP_EXACT3; break;
- case 4: op = OP_EXACT4; break;
- case 5: op = OP_EXACT5; break;
- default: op = OP_EXACTN; break;
- }
- break;
-
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
-
- case 3:
- op = OP_EXACTMB3N;
- break;
-
- default:
- op = OP_EXACTMBN;
- break;
- }
- }
- return op;
-}
-
-static int
-is_strict_real_node(Node* node)
-{
- switch (NODE_TYPE(node)) {
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
- return (sn->end != sn->s);
- }
- break;
-
- case NODE_CCLASS:
- case NODE_CTYPE:
- return 1;
- break;
-
- default:
- return 0;
- break;
- }
-}
-
-static int
-compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)
-{
- int r;
- int saved_num_null_check = reg->num_null_check;
-
- if (emptiness != BODY_IS_NOT_EMPTY) {
- r = add_op(reg, OP_EMPTY_CHECK_START);
- if (r != 0) return r;
- COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
- reg->num_null_check++;
- }
-
- r = compile_tree(node, reg, env);
- if (r != 0) return r;
-
- if (emptiness != BODY_IS_NOT_EMPTY) {
- if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
- r = add_op(reg, OP_EMPTY_CHECK_END);
- else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
-
- if (r != 0) return r;
- COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */
- }
- return r;
-}
-
-#ifdef USE_CALL
-static int
-compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
-{
- int r;
- int offset;
-
- r = add_op(reg, OP_CALL);
- if (r != 0) return r;
-
- COP(reg)->call.addr = 0; /* dummy addr. */
-
- offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
- r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));
- return r;
-}
-#endif
-
-static int
-compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
-{
- int i, r;
-
- for (i = 0; i < n; i++) {
- r = compile_tree(node, reg, env);
- if (r != 0) return r;
- }
- return 0;
-}
-
-static int
-add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
- regex_t* reg ARG_UNUSED, int ignore_case)
-{
- return 1;
-}
-
-static int
-add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
-{
- int op;
- int r;
- int byte_len;
- UChar* p;
- UChar* end;
-
- op = select_str_opcode(mb_len, str_len, ignore_case);
- r = add_op(reg, op);
- if (r != 0) return r;
-
- byte_len = mb_len * str_len;
- end = s + byte_len;
-
- if (op == OP_EXACTMBN) {
- p = onigenc_strdup(reg->enc, s, end);
- CHECK_NULL_RETURN_MEMERR(p);
-
- COP(reg)->exact_len_n.len = mb_len;
- COP(reg)->exact_len_n.n = str_len;
- COP(reg)->exact_len_n.s = p;
- }
- else if (IS_NEED_STR_LEN_OP_EXACT(op)) {
- p = onigenc_strdup(reg->enc, s, end);
- CHECK_NULL_RETURN_MEMERR(p);
-
- if (op == OP_EXACTN_IC)
- COP(reg)->exact_n.n = byte_len;
- else
- COP(reg)->exact_n.n = str_len;
-
- COP(reg)->exact_n.s = p;
- }
- else {
- xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
- COP(reg)->exact.s[byte_len] = '\0';
- }
-
- return 0;
-}
-
-static int
-compile_length_string_node(Node* node, regex_t* reg)
-{
- int rlen, r, len, prev_len, slen, ambig;
- UChar *p, *prev;
- StrNode* sn;
- OnigEncoding enc = reg->enc;
-
- sn = STR_(node);
- if (sn->end <= sn->s)
- return 0;
-
- ambig = NODE_STRING_IS_AMBIG(node);
-
- p = prev = sn->s;
- prev_len = enclen(enc, p);
- p += prev_len;
- slen = 1;
- rlen = 0;
-
- for (; p < sn->end; ) {
- len = enclen(enc, p);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- prev = p;
- slen = 1;
- prev_len = len;
- }
- p += len;
- }
-
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- return rlen;
-}
-
-static int
-compile_length_string_raw_node(StrNode* sn, regex_t* reg)
-{
- if (sn->end <= sn->s)
- return 0;
-
- return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
- reg, 0);
-}
-
-static int
-compile_string_node(Node* node, regex_t* reg)
-{
- int r, len, prev_len, slen, ambig;
- UChar *p, *prev, *end;
- StrNode* sn;
- OnigEncoding enc = reg->enc;
-
- sn = STR_(node);
- if (sn->end <= sn->s)
- return 0;
-
- end = sn->end;
- ambig = NODE_STRING_IS_AMBIG(node);
-
- p = prev = sn->s;
- prev_len = enclen(enc, p);
- p += prev_len;
- slen = 1;
-
- for (; p < end; ) {
- len = enclen(enc, p);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
- if (r != 0) return r;
-
- prev = p;
- slen = 1;
- prev_len = len;
- }
-
- p += len;
- }
-
- return add_compile_string(prev, prev_len, slen, reg, ambig);
-}
-
-static int
-compile_string_raw_node(StrNode* sn, regex_t* reg)
-{
- if (sn->end <= sn->s)
- return 0;
-
- return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
-}
-
-static void*
-set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
-{
- size_t len;
- void* p;
-
- len = (size_t )mbuf->used;
- p = xmalloc(len);
- if (IS_NULL(p)) return NULL;
-
- xmemcpy(p, mbuf->p, len);
- return p;
-}
-
-static int
-compile_length_cclass_node(CClassNode* cc, regex_t* reg)
-{
- return 1;
-}
-
-static int
-compile_cclass_node(CClassNode* cc, regex_t* reg)
-{
- int r;
-
- if (IS_NULL(cc->mbuf)) {
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
- if (r != 0) return r;
-
- COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
- CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
- xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
- }
- else {
- void* p;
-
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
- if (r != 0) return r;
-
- p = set_multi_byte_cclass(cc->mbuf, reg);
- CHECK_NULL_RETURN_MEMERR(p);
- COP(reg)->cclass_mb.mb = p;
- }
- else {
- r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
- if (r != 0) return r;
-
- COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
- CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
- xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
-
- p = set_multi_byte_cclass(cc->mbuf, reg);
- CHECK_NULL_RETURN_MEMERR(p);
- COP(reg)->cclass_mix.mb = p;
- }
- }
-
- return 0;
-}
-
-static int
-entry_repeat_range(regex_t* reg, int id, int lower, int upper)
-{
-#define REPEAT_RANGE_ALLOC 4
-
- OnigRepeatRange* p;
-
- if (reg->repeat_range_alloc == 0) {
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
- }
- else if (reg->repeat_range_alloc <= id) {
- int n;
- n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n,
- sizeof(OnigRepeatRange) * reg->repeat_range_alloc);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = n;
- }
- else {
- p = reg->repeat_range;
- }
-
- p[id].lower = lower;
- p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
- return 0;
-}
-
-static int
-compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
- regex_t* reg, ScanEnv* env)
-{
- int r;
- int num_repeat = reg->num_repeat++;
-
- r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
- if (r != 0) return r;
-
- COP(reg)->repeat.id = num_repeat;
- COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;
-
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
- if (r != 0) return r;
-
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
- if (r != 0) return r;
-
- if (
-#ifdef USE_CALL
- NODE_IS_IN_MULTI_ENTRY(qn) ||
-#endif
- NODE_IS_IN_REAL_REPEAT(qn)) {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
- }
- else {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
- }
- if (r != 0) return r;
-
- COP(reg)->repeat_inc.id = num_repeat;
- return r;
-}
-
-static int
-is_anychar_infinite_greedy(QuantNode* qn)
-{
- if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
- NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
- return 1;
- else
- return 0;
-}
-
-#define QUANTIFIER_EXPAND_LIMIT_SIZE 10
-#define CKN_ON (ckn > 0)
-
-static int
-compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
-{
- int len, mod_tlen;
- int infinite = IS_INFINITE_REPEAT(qn->upper);
- enum BodyEmptyType emptiness = qn->emptiness;
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
-
- if (tlen < 0) return tlen;
- if (tlen == 0) return 0;
-
- /* anychar repeat */
- if (is_anychar_infinite_greedy(qn)) {
- if (qn->lower <= 1 ||
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
- if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
- }
- }
-
- mod_tlen = tlen;
- if (emptiness != BODY_IS_NOT_EMPTY)
- mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
-
- if (infinite &&
- (qn->lower <= 1 ||
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- len = SIZE_OP_JUMP;
- }
- else {
- len = tlen * qn->lower;
- }
-
- if (qn->greedy) {
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else
-#endif
- if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
- else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
- }
- else
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- }
- else
- len = 0;
- }
- else if (!infinite && qn->greedy &&
- (qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
- QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
- len = tlen * qn->lower;
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;
- }
-
- return len;
-}
-
-static int
-compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
-{
- int i, r, mod_tlen;
- int infinite = IS_INFINITE_REPEAT(qn->upper);
- enum BodyEmptyType emptiness = qn->emptiness;
- int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
-
- if (tlen < 0) return tlen;
- if (tlen == 0) return 0;
-
- if (is_anychar_infinite_greedy(qn) &&
- (qn->lower <= 1 ||
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
- if (r != 0) return r;
- if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_op(reg,
- IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
- OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r != 0) return r;
-
- COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
- return 0;
- }
- else {
- r = add_op(reg,
- IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
- OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
- return r;
- }
- }
-
- mod_tlen = tlen;
- if (emptiness != BODY_IS_NOT_EMPTY)
- mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
-
- if (infinite &&
- (qn->lower <= 1 ||
- int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
- int addr;
-
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- if (qn->greedy) {
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (IS_NOT_NULL(qn->head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;
- else
-#endif
- if (IS_NOT_NULL(qn->next_head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;
- else
- COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;
- }
- else {
- COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;
- }
- }
- else {
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
- if (r != 0) return r;
- }
-
- if (qn->greedy) {
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (IS_NOT_NULL(qn->head_exact)) {
- r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
- if (r != 0) return r;
- COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
- COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
-
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
- if (r != 0) return r;
-
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
- }
- else
-#endif
- if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
- if (r != 0) return r;
- COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
- COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
-
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
- if (r != 0) return r;
-
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
- }
- else {
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
-
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
- if (r != 0) return r;
-
- addr = -(mod_tlen + (int )SIZE_OP_PUSH);
- }
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = addr;
- }
- else {
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
-
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
- if (r != 0) return r;
-
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = -mod_tlen;
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
-
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- }
- else {
- /* Nothing output */
- r = 0;
- }
- }
- else if (! infinite && qn->greedy &&
- (qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
- QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
- int n = qn->upper - qn->lower;
-
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
- if (r != 0) return r;
-
- for (i = 0; i < n; i++) {
- int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);
- if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
-
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = v;
-
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- if (r != 0) return r;
- }
- }
- else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
-
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
- }
- return r;
-}
-
-static int
-compile_length_option_node(BagNode* node, regex_t* reg)
-{
- int tlen;
- OnigOptionType prev = reg->options;
-
- reg->options = node->o.options;
- tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
- reg->options = prev;
-
- return tlen;
-}
-
-static int
-compile_option_node(BagNode* node, regex_t* reg, ScanEnv* env)
-{
- int r;
- OnigOptionType prev = reg->options;
-
- reg->options = node->o.options;
- r = compile_tree(NODE_BAG_BODY(node), reg, env);
- reg->options = prev;
-
- return r;
-}
-
-static int
-compile_length_bag_node(BagNode* node, regex_t* reg)
-{
- int len;
- int tlen;
-
- if (node->type == BAG_OPTION)
- return compile_length_option_node(node, reg);
-
- if (NODE_BAG_BODY(node)) {
- tlen = compile_length_tree(NODE_BAG_BODY(node), reg);
- if (tlen < 0) return tlen;
- }
- else
- tlen = 0;
-
- switch (node->type) {
- case BAG_MEMORY:
-#ifdef USE_CALL
-
- if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
- len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- return len;
- }
-
- if (NODE_IS_CALLED(node)) {
- len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
- }
- else if (NODE_IS_RECURSION(node)) {
- len = SIZE_OP_MEMORY_START_PUSH;
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
- }
- else
-#endif
- {
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
- else
- len = SIZE_OP_MEMORY_START;
-
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
- }
- break;
-
- case BAG_STOP_BACKTRACK:
- if (NODE_IS_STRICT_REAL_REPEAT(node)) {
- int v;
- QuantNode* qn;
-
- qn = QUANT_(NODE_BAG_BODY(node));
- tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
- if (tlen < 0) return tlen;
-
- v = onig_positive_int_multiply(qn->lower, tlen);
- if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
- }
- else {
- len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
- }
- break;
-
- case BAG_IF_ELSE:
- {
- Node* cond = NODE_BAG_BODY(node);
- Node* Then = node->te.Then;
- Node* Else = node->te.Else;
-
- len = compile_length_tree(cond, reg);
- if (len < 0) return len;
- len += SIZE_OP_PUSH;
- len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
-
- if (IS_NOT_NULL(Then)) {
- tlen = compile_length_tree(Then, reg);
- if (tlen < 0) return tlen;
- len += tlen;
- }
-
- len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;
-
- if (IS_NOT_NULL(Else)) {
- tlen = compile_length_tree(Else, reg);
- if (tlen < 0) return tlen;
- len += tlen;
- }
- }
- break;
-
- case BAG_OPTION:
- /* never come here, but set for escape warning */
- len = 0;
- break;
- }
-
- return len;
-}
-
-static int get_char_len_node(Node* node, regex_t* reg, int* len);
-
-static int
-compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
-{
- int r;
- int len;
-
-#ifdef USE_CALL
- if (NODE_IS_CALLED(node)) {
- r = add_op(reg, OP_CALL);
- if (r != 0) return r;
-
- node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;
- NODE_STATUS_ADD(node, ADDR_FIXED);
- COP(reg)->call.addr = (int )node->m.called_addr;
-
- if (node->m.regnum == 0) {
- len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += SIZE_OP_RETURN;
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
-
- r = compile_tree(NODE_BAG_BODY(node), reg, env);
- if (r != 0) return r;
-
- r = add_op(reg, OP_RETURN);
- return r;
- }
- else {
- len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
- }
- }
-#endif
-
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- r = add_op(reg, OP_MEMORY_START_PUSH);
- else
- r = add_op(reg, OP_MEMORY_START);
- if (r != 0) return r;
- COP(reg)->memory_start.num = node->m.regnum;
-
- r = compile_tree(NODE_BAG_BODY(node), reg, env);
- if (r != 0) return r;
-
-#ifdef USE_CALL
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_op(reg, (NODE_IS_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
- else
- r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));
- if (r != 0) return r;
- COP(reg)->memory_end.num = node->m.regnum;
-
- if (NODE_IS_CALLED(node)) {
- if (r != 0) return r;
- r = add_op(reg, OP_RETURN);
- }
-#else
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_op(reg, OP_MEMORY_END_PUSH);
- else
- r = add_op(reg, OP_MEMORY_END);
- if (r != 0) return r;
- COP(reg)->memory_end.num = node->m.regnum;
-#endif
-
- return r;
-}
-
-static int
-compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
-{
- int r, len;
-
- switch (node->type) {
- case BAG_MEMORY:
- r = compile_bag_memory_node(node, reg, env);
- break;
-
- case BAG_OPTION:
- r = compile_option_node(node, reg, env);
- break;
-
- case BAG_STOP_BACKTRACK:
- if (NODE_IS_STRICT_REAL_REPEAT(node)) {
- QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
- r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
- if (r != 0) return r;
-
- len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
- if (len < 0) return len;
-
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
-
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_POP_OUT);
- if (r != 0) return r;
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);
- }
- else {
- r = add_op(reg, OP_ATOMIC_START);
- if (r != 0) return r;
- r = compile_tree(NODE_BAG_BODY(node), reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_ATOMIC_END);
- }
- break;
-
- case BAG_IF_ELSE:
- {
- int cond_len, then_len, else_len, jump_len;
- Node* cond = NODE_BAG_BODY(node);
- Node* Then = node->te.Then;
- Node* Else = node->te.Else;
-
- r = add_op(reg, OP_ATOMIC_START);
- if (r != 0) return r;
-
- cond_len = compile_length_tree(cond, reg);
- if (cond_len < 0) return cond_len;
- if (IS_NOT_NULL(Then)) {
- then_len = compile_length_tree(Then, reg);
- if (then_len < 0) return then_len;
- }
- else
- then_len = 0;
-
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;
-
- r = add_op(reg, OP_PUSH);
- if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + jump_len;
-
- r = compile_tree(cond, reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_ATOMIC_END);
- if (r != 0) return r;
-
- if (IS_NOT_NULL(Then)) {
- r = compile_tree(Then, reg, env);
- if (r != 0) return r;
- }
-
- if (IS_NOT_NULL(Else)) {
- else_len = compile_length_tree(Else, reg);
- if (else_len < 0) return else_len;
- }
- else
- else_len = 0;
-
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;
-
- r = add_op(reg, OP_ATOMIC_END);
- if (r != 0) return r;
-
- if (IS_NOT_NULL(Else)) {
- r = compile_tree(Else, reg, env);
- }
- }
- break;
- }
-
- return r;
-}
-
-static int
-compile_length_anchor_node(AnchorNode* node, regex_t* reg)
-{
- int len;
- int tlen = 0;
-
- if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
- tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
- if (tlen < 0) return tlen;
- }
-
- switch (node->type) {
- case ANCR_PREC_READ:
- len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
- break;
- case ANCR_PREC_READ_NOT:
- len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
- break;
- case ANCR_LOOK_BEHIND:
- len = SIZE_OP_LOOK_BEHIND + tlen;
- break;
- case ANCR_LOOK_BEHIND_NOT:
- len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
- break;
-
- case ANCR_WORD_BOUNDARY:
- case ANCR_NO_WORD_BOUNDARY:
-#ifdef USE_WORD_BEGIN_END
- case ANCR_WORD_BEGIN:
- case ANCR_WORD_END:
-#endif
- len = SIZE_OP_WORD_BOUNDARY;
- break;
-
- case ANCR_TEXT_SEGMENT_BOUNDARY:
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
- len = SIZE_OPCODE;
- break;
-
- default:
- len = SIZE_OPCODE;
- break;
- }
-
- return len;
-}
-
-static int
-compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
-{
- int r, len;
- enum OpCode op;
-
- switch (node->type) {
- case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break;
- case ANCR_END_BUF: r = add_op(reg, OP_END_BUF); break;
- case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break;
- case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break;
- case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break;
- case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;
-
- case ANCR_WORD_BOUNDARY:
- op = OP_WORD_BOUNDARY;
- word:
- r = add_op(reg, op);
- if (r != 0) return r;
- COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
- break;
-
- case ANCR_NO_WORD_BOUNDARY:
- op = OP_NO_WORD_BOUNDARY; goto word;
- break;
-#ifdef USE_WORD_BEGIN_END
- case ANCR_WORD_BEGIN:
- op = OP_WORD_BEGIN; goto word;
- break;
- case ANCR_WORD_END:
- op = OP_WORD_END; goto word;
- break;
-#endif
-
- case ANCR_TEXT_SEGMENT_BOUNDARY:
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
- {
- enum TextSegmentBoundaryType type;
-
- r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
- if (r != 0) return r;
-
- type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
-#ifdef USE_UNICODE_WORD_BREAK
- if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))
- type = WORD_BOUNDARY;
-#endif
-
- COP(reg)->text_segment_boundary.type = type;
- COP(reg)->text_segment_boundary.not =
- (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
- }
- break;
-
- case ANCR_PREC_READ:
- r = add_op(reg, OP_PREC_READ_START);
- if (r != 0) return r;
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_PREC_READ_END);
- break;
-
- case ANCR_PREC_READ_NOT:
- len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
- if (len < 0) return len;
-
- r = add_op(reg, OP_PREC_READ_NOT_START);
- if (r != 0) return r;
- COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_PREC_READ_NOT_END);
- break;
-
- case ANCR_LOOK_BEHIND:
- {
- int n;
- r = add_op(reg, OP_LOOK_BEHIND);
- if (r != 0) return r;
- if (node->char_len < 0) {
- r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
- if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
-
- COP(reg)->look_behind.len = n;
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
- }
- break;
-
- case ANCR_LOOK_BEHIND_NOT:
- {
- int n;
-
- len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
- r = add_op(reg, OP_LOOK_BEHIND_NOT_START);
- if (r != 0) return r;
- COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;
-
- if (node->char_len < 0) {
- r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
- if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
-
- COP(reg)->look_behind_not_start.len = n;
-
- r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
- if (r != 0) return r;
- r = add_op(reg, OP_LOOK_BEHIND_NOT_END);
- }
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-compile_gimmick_node(GimmickNode* node, regex_t* reg)
-{
- int r;
-
- switch (node->type) {
- case GIMMICK_FAIL:
- r = add_op(reg, OP_FAIL);
- break;
-
- case GIMMICK_SAVE:
- r = add_op(reg, OP_PUSH_SAVE_VAL);
- if (r != 0) return r;
- COP(reg)->push_save_val.type = node->detail_type;
- COP(reg)->push_save_val.id = node->id;
- break;
-
- case GIMMICK_UPDATE_VAR:
- r = add_op(reg, OP_UPDATE_VAR);
- if (r != 0) return r;
- COP(reg)->update_var.type = node->detail_type;
- COP(reg)->update_var.id = node->id;
- break;
-
-#ifdef USE_CALLOUT
- case GIMMICK_CALLOUT:
- switch (node->detail_type) {
- case ONIG_CALLOUT_OF_CONTENTS:
- case ONIG_CALLOUT_OF_NAME:
- {
- if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
- r = add_op(reg, OP_CALLOUT_NAME);
- if (r != 0) return r;
- COP(reg)->callout_name.id = node->id;
- COP(reg)->callout_name.num = node->num;
- }
- else {
- r = add_op(reg, OP_CALLOUT_CONTENTS);
- if (r != 0) return r;
- COP(reg)->callout_contents.num = node->num;
- }
- }
- break;
-
- default:
- r = ONIGERR_TYPE_BUG;
- break;
- }
-#endif
- }
-
- return r;
-}
-
-static int
-compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
-{
- int len;
-
- switch (node->type) {
- case GIMMICK_FAIL:
- len = SIZE_OP_FAIL;
- break;
-
- case GIMMICK_SAVE:
- len = SIZE_OP_PUSH_SAVE_VAL;
- break;
-
- case GIMMICK_UPDATE_VAR:
- len = SIZE_OP_UPDATE_VAR;
- break;
-
-#ifdef USE_CALLOUT
- case GIMMICK_CALLOUT:
- switch (node->detail_type) {
- case ONIG_CALLOUT_OF_CONTENTS:
- len = SIZE_OP_CALLOUT_CONTENTS;
- break;
- case ONIG_CALLOUT_OF_NAME:
- len = SIZE_OP_CALLOUT_NAME;
- break;
-
- default:
- len = ONIGERR_TYPE_BUG;
- break;
- }
- break;
-#endif
- }
-
- return len;
-}
-
-static int
-compile_length_tree(Node* node, regex_t* reg)
-{
- int len, r;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- len = 0;
- do {
- r = compile_length_tree(NODE_CAR(node), reg);
- if (r < 0) return r;
- len += r;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- r = len;
- break;
-
- case NODE_ALT:
- {
- int n;
-
- n = r = 0;
- do {
- r += compile_length_tree(NODE_CAR(node), reg);
- n++;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
- }
- break;
-
- case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_length_string_raw_node(STR_(node), reg);
- else
- r = compile_length_string_node(node, reg);
- break;
-
- case NODE_CCLASS:
- r = compile_length_cclass_node(CCLASS_(node), reg);
- break;
-
- case NODE_CTYPE:
- r = SIZE_OPCODE;
- break;
-
- case NODE_BACKREF:
- r = SIZE_OP_BACKREF;
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- r = SIZE_OP_CALL;
- break;
-#endif
-
- case NODE_QUANT:
- r = compile_length_quantifier_node(QUANT_(node), reg);
- break;
-
- case NODE_BAG:
- r = compile_length_bag_node(BAG_(node), reg);
- break;
-
- case NODE_ANCHOR:
- r = compile_length_anchor_node(ANCHOR_(node), reg);
- break;
-
- case NODE_GIMMICK:
- r = compile_length_gimmick_node(GIMMICK_(node), reg);
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-compile_tree(Node* node, regex_t* reg, ScanEnv* env)
-{
- int n, len, pos, r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- do {
- r = compile_tree(NODE_CAR(node), reg, env);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ALT:
- {
- Node* x = node;
- len = 0;
- do {
- len += compile_length_tree(NODE_CAR(x), reg);
- if (IS_NOT_NULL(NODE_CDR(x))) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
- }
- } while (IS_NOT_NULL(x = NODE_CDR(x)));
- pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */
-
- do {
- len = compile_length_tree(NODE_CAR(node), reg);
- if (IS_NOT_NULL(NODE_CDR(node))) {
- enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
- r = add_op(reg, push);
- if (r != 0) break;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;
- }
- r = compile_tree(NODE_CAR(node), reg, env);
- if (r != 0) break;
- if (IS_NOT_NULL(NODE_CDR(node))) {
- len = pos - (COP_CURR_OFFSET(reg) + 1);
- r = add_op(reg, OP_JUMP);
- if (r != 0) break;
- COP(reg)->jump.addr = len;
- }
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
- case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_string_raw_node(STR_(node), reg);
- else
- r = compile_string_node(node, reg);
- break;
-
- case NODE_CCLASS:
- r = compile_cclass_node(CCLASS_(node), reg);
- break;
-
- case NODE_CTYPE:
- {
- int op;
-
- switch (CTYPE_(node)->ctype) {
- case CTYPE_ANYCHAR:
- r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?
- OP_ANYCHAR_ML : OP_ANYCHAR);
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (CTYPE_(node)->ascii_mode == 0) {
- op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
- }
- else {
- op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
- }
- r = add_op(reg, op);
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- }
- break;
-
- case NODE_BACKREF:
- {
- BackRefNode* br = BACKREF_(node);
-
- if (NODE_IS_CHECKER(node)) {
-#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
- if (r != 0) return r;
- COP(reg)->backref_general.nest_level = br->nest_level;
- }
- else
-#endif
- {
- r = add_op(reg, OP_BACKREF_CHECK);
- if (r != 0) return r;
- }
- goto add_bacref_mems;
- }
- else {
-#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)
- r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
- else
- r = add_op(reg, OP_BACKREF_WITH_LEVEL);
-
- if (r != 0) return r;
- COP(reg)->backref_general.nest_level = br->nest_level;
- goto add_bacref_mems;
- }
- else
-#endif
- if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_op(reg, OP_BACKREF_N_IC);
- if (r != 0) return r;
- COP(reg)->backref_n.n1 = n;
- }
- else {
- switch (n) {
- case 1: r = add_op(reg, OP_BACKREF1); break;
- case 2: r = add_op(reg, OP_BACKREF2); break;
- default:
- r = add_op(reg, OP_BACKREF_N);
- if (r != 0) return r;
- COP(reg)->backref_n.n1 = n;
- break;
- }
- }
- }
- else {
- int num;
- int* p;
-
- r = add_op(reg, IS_IGNORECASE(reg->options) ?
- OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
- if (r != 0) return r;
-
- add_bacref_mems:
- num = br->back_num;
- COP(reg)->backref_general.num = num;
- if (num == 1) {
- COP(reg)->backref_general.n1 = br->back_static[0];
- }
- else {
- int i, j;
- MemNumType* ns;
-
- ns = xmalloc(sizeof(MemNumType) * num);
- CHECK_NULL_RETURN_MEMERR(ns);
- COP(reg)->backref_general.ns = ns;
- p = BACKREFS_P(br);
- for (i = num - 1, j = 0; i >= 0; i--, j++) {
- ns[j] = p[i];
- }
- }
- }
- }
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- r = compile_call(CALL_(node), reg, env);
- break;
-#endif
-
- case NODE_QUANT:
- r = compile_quantifier_node(QUANT_(node), reg, env);
- break;
-
- case NODE_BAG:
- r = compile_bag_node(BAG_(node), reg, env);
- break;
-
- case NODE_ANCHOR:
- r = compile_anchor_node(ANCHOR_(node), reg, env);
- break;
-
- case NODE_GIMMICK:
- r = compile_gimmick_node(GIMMICK_(node), reg);
- break;
-
- default:
-#ifdef ONIG_DEBUG
- fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
-#endif
- break;
- }
-
- return r;
-}
-
-static int
-noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
-{
- int r = 0;
- Node* node = *plink;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = noname_disable_map(&(NODE_CAR(node)), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- {
- Node** ptarget = &(NODE_BODY(node));
- Node* old = *ptarget;
- r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
- onig_reduce_nested_quantifier(node, *ptarget);
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_NAMED_GROUP(node)) {
- (*counter)++;
- map[en->m.regnum].new_val = *counter;
- en->m.regnum = *counter;
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);
- }
- else {
- *plink = NODE_BODY(node);
- NODE_BODY(node) = NULL_NODE;
- onig_node_free(node);
- r = noname_disable_map(plink, map, counter);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- r = noname_disable_map(&(NODE_BAG_BODY(en)), map, counter);
- if (r != 0) return r;
- if (IS_NOT_NULL(en->te.Then)) {
- r = noname_disable_map(&(en->te.Then), map, counter);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = noname_disable_map(&(en->te.Else), map, counter);
- if (r != 0) return r;
- }
- }
- else
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);
- }
- break;
-
- case NODE_ANCHOR:
- if (IS_NOT_NULL(NODE_BODY(node)))
- r = noname_disable_map(&(NODE_BODY(node)), map, counter);
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-renumber_node_backref(Node* node, GroupNumRemap* map)
-{
- int i, pos, n, old_num;
- int *backs;
- BackRefNode* bn = BACKREF_(node);
-
- if (! NODE_IS_BY_NAME(node))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
-
- old_num = bn->back_num;
- if (IS_NULL(bn->back_dynamic))
- backs = bn->back_static;
- else
- backs = bn->back_dynamic;
-
- for (i = 0, pos = 0; i < old_num; i++) {
- n = map[backs[i]].new_val;
- if (n > 0) {
- backs[pos] = n;
- pos++;
- }
- }
-
- bn->back_num = pos;
- return 0;
-}
-
-static int
-renumber_by_map(Node* node, GroupNumRemap* map)
-{
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = renumber_by_map(NODE_CAR(node), map);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- r = renumber_by_map(NODE_BODY(node), map);
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- r = renumber_by_map(NODE_BODY(node), map);
- if (r != 0) return r;
-
- if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = renumber_by_map(en->te.Then, map);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = renumber_by_map(en->te.Else, map);
- if (r != 0) return r;
- }
- }
- }
- break;
-
- case NODE_BACKREF:
- r = renumber_node_backref(node, map);
- break;
-
- case NODE_ANCHOR:
- if (IS_NOT_NULL(NODE_BODY(node)))
- r = renumber_by_map(NODE_BODY(node), map);
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-numbered_ref_check(Node* node)
-{
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = numbered_ref_check(NODE_CAR(node));
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ANCHOR:
- if (IS_NULL(NODE_BODY(node)))
- break;
- /* fall */
- case NODE_QUANT:
- r = numbered_ref_check(NODE_BODY(node));
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- r = numbered_ref_check(NODE_BODY(node));
- if (r != 0) return r;
-
- if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = numbered_ref_check(en->te.Then);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = numbered_ref_check(en->te.Else);
- if (r != 0) return r;
- }
- }
- }
-
- break;
-
- case NODE_BACKREF:
- if (! NODE_IS_BY_NAME(node))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
-{
- int r, i, pos, counter;
- int result;
- MemStatusType loc;
- GroupNumRemap* map;
-
- map = (GroupNumRemap* )xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
- CHECK_NULL_RETURN_MEMERR(map);
- for (i = 1; i <= env->num_mem; i++) {
- map[i].new_val = 0;
- }
- counter = 0;
- r = noname_disable_map(root, map, &counter);
- if (r != 0) return r;
-
- r = renumber_by_map(*root, map);
- if (r != 0) return r;
-
- for (i = 1, pos = 1; i <= env->num_mem; i++) {
- if (map[i].new_val > 0) {
- SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
- pos++;
- }
- }
-
- loc = env->capture_history;
- MEM_STATUS_CLEAR(env->capture_history);
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (MEM_STATUS_AT(loc, i)) {
- MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
- }
- }
-
- env->num_mem = env->num_named;
- reg->num_mem = env->num_named;
- result = onig_renumber_name_table(reg, map);
- xfree(map);
- return result;
-}
-
-#ifdef USE_CALL
-static int
-fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
-{
- int i, offset;
- BagNode* en;
- AbsAddrType addr;
- AbsAddrType* paddr;
-
- for (i = 0; i < uslist->num; i++) {
- if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
- return ONIGERR_PARSER_BUG;
-
- en = BAG_(uslist->us[i].target);
- addr = en->m.called_addr;
- offset = uslist->us[i].offset;
-
- paddr = (AbsAddrType* )((char* )reg->ops + offset);
- *paddr = addr;
- }
- return 0;
-}
-#endif
-
-
-#define GET_CHAR_LEN_VARLEN -1
-#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
-
-/* fixed size pattern node only */
-static int
-get_char_len_node1(Node* node, regex_t* reg, int* len, int level)
-{
- int tlen;
- int r = 0;
-
- level++;
- *len = 0;
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- do {
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
- if (r == 0)
- *len = distance_add(*len, tlen);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ALT:
- {
- int tlen2;
- int varlen = 0;
-
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen, level);
- while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
- r = get_char_len_node1(NODE_CAR(node), reg, &tlen2, level);
- if (r == 0) {
- if (tlen != tlen2)
- varlen = 1;
- }
- }
- if (r == 0) {
- if (varlen != 0) {
- if (level == 1)
- r = GET_CHAR_LEN_TOP_ALT_VARLEN;
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- else
- *len = tlen;
- }
- }
- break;
-
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
- UChar *s = sn->s;
-
- while (s < sn->end) {
- s += enclen(reg->enc, s);
- (*len)++;
- }
- }
- break;
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
-
- if (qn->lower == qn->upper) {
- if (qn->upper == 0) {
- *len = 0;
- }
- else {
- r = get_char_len_node1(NODE_BODY(node), reg, &tlen, level);
- if (r == 0)
- *len = distance_multiply(tlen, qn->lower);
- }
- }
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- if (! NODE_IS_RECURSION(node))
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);
- else
- r = GET_CHAR_LEN_VARLEN;
- break;
-#endif
-
- case NODE_CTYPE:
- case NODE_CCLASS:
- *len = 1;
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- switch (en->type) {
- case BAG_MEMORY:
-#ifdef USE_CALL
- if (NODE_IS_CLEN_FIXED(node))
- *len = en->char_len;
- else {
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);
- if (r == 0) {
- en->char_len = *len;
- NODE_STATUS_ADD(node, CLEN_FIXED);
- }
- }
- break;
-#endif
- case BAG_OPTION:
- case BAG_STOP_BACKTRACK:
- r = get_char_len_node1(NODE_BODY(node), reg, len, level);
- break;
- case BAG_IF_ELSE:
- {
- int clen, elen;
-
- r = get_char_len_node1(NODE_BODY(node), reg, &clen, level);
- if (r == 0) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = get_char_len_node1(en->te.Then, reg, &tlen, level);
- if (r != 0) break;
- }
- else tlen = 0;
- if (IS_NOT_NULL(en->te.Else)) {
- r = get_char_len_node1(en->te.Else, reg, &elen, level);
- if (r != 0) break;
- }
- else elen = 0;
-
- if (clen + tlen != elen) {
- r = GET_CHAR_LEN_VARLEN;
- }
- else {
- *len = elen;
- }
- }
- }
- break;
- }
- }
- break;
-
- case NODE_ANCHOR:
- case NODE_GIMMICK:
- break;
-
- case NODE_BACKREF:
- if (NODE_IS_CHECKER(node))
- break;
- /* fall */
- default:
- r = GET_CHAR_LEN_VARLEN;
- break;
- }
-
- return r;
-}
-
-static int
-get_char_len_node(Node* node, regex_t* reg, int* len)
-{
- return get_char_len_node1(node, reg, len, 0);
-}
-
-/* x is not included y ==> 1 : 0 */
-static int
-is_exclusive(Node* x, Node* y, regex_t* reg)
-{
- int i, len;
- OnigCodePoint code;
- UChar *p;
- NodeType ytype;
-
- retry:
- ytype = NODE_TYPE(y);
- switch (NODE_TYPE(x)) {
- case NODE_CTYPE:
- {
- if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
- CTYPE_(y)->ctype == CTYPE_ANYCHAR)
- break;
-
- switch (ytype) {
- case NODE_CTYPE:
- if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
- CTYPE_(y)->not != CTYPE_(x)->not &&
- CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
- return 1;
- else
- return 0;
- break;
-
- case NODE_CCLASS:
- swap:
- {
- Node* tmp;
- tmp = x; x = y; y = tmp;
- goto retry;
- }
- break;
-
- case NODE_STRING:
- goto swap;
- break;
-
- default:
- break;
- }
- }
- break;
-
- case NODE_CCLASS:
- {
- int range;
- CClassNode* xc = CCLASS_(x);
-
- switch (ytype) {
- case NODE_CTYPE:
- switch (CTYPE_(y)->ctype) {
- case CTYPE_ANYCHAR:
- return 0;
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (CTYPE_(y)->not == 0) {
- if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
- range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
- for (i = 0; i < range; i++) {
- if (BITSET_AT(xc->bs, i)) {
- if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
- }
- }
- return 1;
- }
- return 0;
- }
- else {
- if (IS_NOT_NULL(xc->mbuf)) return 0;
- if (IS_NCCLASS_NOT(xc)) return 0;
-
- range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
- for (i = 0; i < range; i++) {
- if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- }
- for (i = range; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(xc->bs, i)) return 0;
- }
- return 1;
- }
- break;
-
- default:
- break;
- }
- break;
-
- case NODE_CCLASS:
- {
- int v;
- CClassNode* yc = CCLASS_(y);
-
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- v = BITSET_AT(xc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
- v = BITSET_AT(yc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
- (v == 0 && IS_NCCLASS_NOT(yc)))
- return 0;
- }
- }
- if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
- (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
- return 1;
- return 0;
- }
- break;
-
- case NODE_STRING:
- goto swap;
- break;
-
- default:
- break;
- }
- }
- break;
-
- case NODE_STRING:
- {
- StrNode* xs = STR_(x);
-
- if (NODE_STRING_LEN(x) == 0)
- break;
-
- switch (ytype) {
- case NODE_CTYPE:
- switch (CTYPE_(y)->ctype) {
- case CTYPE_ANYCHAR:
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (CTYPE_(y)->ascii_mode == 0) {
- if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
- return CTYPE_(y)->not;
- else
- return !(CTYPE_(y)->not);
- }
- else {
- if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
- return CTYPE_(y)->not;
- else
- return !(CTYPE_(y)->not);
- }
- break;
- default:
- break;
- }
- break;
-
- case NODE_CCLASS:
- {
- CClassNode* cc = CCLASS_(y);
-
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
- return onig_is_code_in_cc(reg->enc, code, cc) == 0;
- }
- break;
-
- case NODE_STRING:
- {
- UChar *q;
- StrNode* ys = STR_(y);
-
- len = NODE_STRING_LEN(x);
- if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
- if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
- /* tiny version */
- return 0;
- }
- else {
- for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
- if (*p != *q) return 1;
- }
- }
- }
- break;
-
- default:
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return 0;
-}
-
-static Node*
-get_head_value_node(Node* node, int exact, regex_t* reg)
-{
- Node* n = NULL_NODE;
-
- switch (NODE_TYPE(node)) {
- case NODE_BACKREF:
- case NODE_ALT:
-#ifdef USE_CALL
- case NODE_CALL:
-#endif
- break;
-
- case NODE_CTYPE:
- if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
- break;
- /* fall */
- case NODE_CCLASS:
- if (exact == 0) {
- n = node;
- }
- break;
-
- case NODE_LIST:
- n = get_head_value_node(NODE_CAR(node), exact, reg);
- break;
-
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
-
- if (sn->end <= sn->s)
- break;
-
- if (exact == 0 ||
- ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {
- n = node;
- }
- }
- break;
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
- if (qn->lower > 0) {
- if (IS_NOT_NULL(qn->head_exact))
- n = qn->head_exact;
- else
- n = get_head_value_node(NODE_BODY(node), exact, reg);
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- switch (en->type) {
- case BAG_OPTION:
- {
- OnigOptionType options = reg->options;
-
- reg->options = BAG_(node)->o.options;
- n = get_head_value_node(NODE_BODY(node), exact, reg);
- reg->options = options;
- }
- break;
-
- case BAG_MEMORY:
- case BAG_STOP_BACKTRACK:
- case BAG_IF_ELSE:
- n = get_head_value_node(NODE_BODY(node), exact, reg);
- break;
- }
- }
- break;
-
- case NODE_ANCHOR:
- if (ANCHOR_(node)->type == ANCR_PREC_READ)
- n = get_head_value_node(NODE_BODY(node), exact, reg);
- break;
-
- case NODE_GIMMICK:
- default:
- break;
- }
-
- return n;
-}
-
-static int
-check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)
-{
- NodeType type;
- int r = 0;
-
- type = NODE_TYPE(node);
- if ((NODE_TYPE2BIT(type) & type_mask) == 0)
- return 1;
-
- switch (type) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- if (((1<<en->type) & bag_mask) == 0)
- return 1;
-
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
- if (r == 0 && en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = check_type_tree(en->te.Then, type_mask, bag_mask, anchor_mask);
- if (r != 0) break;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = check_type_tree(en->te.Else, type_mask, bag_mask, anchor_mask);
- }
- }
- }
- break;
-
- case NODE_ANCHOR:
- type = ANCHOR_(node)->type;
- if ((type & anchor_mask) == 0)
- return 1;
-
- if (IS_NOT_NULL(NODE_BODY(node)))
- r = check_type_tree(NODE_BODY(node), type_mask, bag_mask, anchor_mask);
- break;
-
- case NODE_GIMMICK:
- default:
- break;
- }
- return r;
-}
-
-static OnigLen
-tree_min_len(Node* node, ScanEnv* env)
-{
- OnigLen len;
- OnigLen tmin;
-
- len = 0;
- switch (NODE_TYPE(node)) {
- case NODE_BACKREF:
- if (! NODE_IS_CHECKER(node)) {
- int i;
- int* backs;
- MemEnv* mem_env = SCANENV_MEMENV(env);
- BackRefNode* br = BACKREF_(node);
- if (NODE_IS_RECURSION(node)) break;
-
- backs = BACKREFS_P(br);
- len = tree_min_len(mem_env[backs[0]].node, env);
- for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env);
- if (len > tmin) len = tmin;
- }
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- {
- Node* t = NODE_BODY(node);
- if (NODE_IS_RECURSION(node)) {
- if (NODE_IS_MIN_FIXED(t))
- len = BAG_(t)->min_len;
- }
- else
- len = tree_min_len(t, env);
- }
- break;
-#endif
-
- case NODE_LIST:
- do {
- tmin = tree_min_len(NODE_CAR(node), env);
- len = distance_add(len, tmin);
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ALT:
- {
- Node *x, *y;
- y = node;
- do {
- x = NODE_CAR(y);
- tmin = tree_min_len(x, env);
- if (y == node) len = tmin;
- else if (len > tmin) len = tmin;
- } while (IS_NOT_NULL(y = NODE_CDR(y)));
- }
- break;
-
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
- len = (int )(sn->end - sn->s);
- }
- break;
-
- case NODE_CTYPE:
- case NODE_CCLASS:
- len = ONIGENC_MBC_MINLEN(env->enc);
- break;
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
-
- if (qn->lower > 0) {
- len = tree_min_len(NODE_BODY(node), env);
- len = distance_multiply(len, qn->lower);
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- switch (en->type) {
- case BAG_MEMORY:
- if (NODE_IS_MIN_FIXED(node))
- len = en->min_len;
- else {
- if (NODE_IS_MARK1(node))
- len = 0; /* recursive */
- else {
- NODE_STATUS_ADD(node, MARK1);
- len = tree_min_len(NODE_BODY(node), env);
- NODE_STATUS_REMOVE(node, MARK1);
-
- en->min_len = len;
- NODE_STATUS_ADD(node, MIN_FIXED);
- }
- }
- break;
-
- case BAG_OPTION:
- case BAG_STOP_BACKTRACK:
- len = tree_min_len(NODE_BODY(node), env);
- break;
- case BAG_IF_ELSE:
- {
- OnigLen elen;
-
- len = tree_min_len(NODE_BODY(node), env);
- if (IS_NOT_NULL(en->te.Then))
- len += tree_min_len(en->te.Then, env);
- if (IS_NOT_NULL(en->te.Else))
- elen = tree_min_len(en->te.Else, env);
- else elen = 0;
-
- if (elen < len) len = elen;
- }
- break;
- }
- }
- break;
-
- case NODE_GIMMICK:
- {
- GimmickNode* g = GIMMICK_(node);
- if (g->type == GIMMICK_FAIL) {
- len = INFINITE_LEN;
- break;
- }
- }
- /* fall */
- case NODE_ANCHOR:
- default:
- break;
- }
-
- return len;
-}
-
-static OnigLen
-tree_max_len(Node* node, ScanEnv* env)
-{
- OnigLen len;
- OnigLen tmax;
-
- len = 0;
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- do {
- tmax = tree_max_len(NODE_CAR(node), env);
- len = distance_add(len, tmax);
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ALT:
- do {
- tmax = tree_max_len(NODE_CAR(node), env);
- if (len < tmax) len = tmax;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
- len = (OnigLen )(sn->end - sn->s);
- }
- break;
-
- case NODE_CTYPE:
- case NODE_CCLASS:
- len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- case NODE_BACKREF:
- if (! NODE_IS_CHECKER(node)) {
- int i;
- int* backs;
- MemEnv* mem_env = SCANENV_MEMENV(env);
- BackRefNode* br = BACKREF_(node);
- if (NODE_IS_RECURSION(node)) {
- len = INFINITE_LEN;
- break;
- }
- backs = BACKREFS_P(br);
- for (i = 0; i < br->back_num; i++) {
- tmax = tree_max_len(mem_env[backs[i]].node, env);
- if (len < tmax) len = tmax;
- }
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- if (! NODE_IS_RECURSION(node))
- len = tree_max_len(NODE_BODY(node), env);
- else
- len = INFINITE_LEN;
- break;
-#endif
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
-
- if (qn->upper != 0) {
- len = tree_max_len(NODE_BODY(node), env);
- if (len != 0) {
- if (! IS_INFINITE_REPEAT(qn->upper))
- len = distance_multiply(len, qn->upper);
- else
- len = INFINITE_LEN;
- }
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- switch (en->type) {
- case BAG_MEMORY:
- if (NODE_IS_MAX_FIXED(node))
- len = en->max_len;
- else {
- if (NODE_IS_MARK1(node))
- len = INFINITE_LEN;
- else {
- NODE_STATUS_ADD(node, MARK1);
- len = tree_max_len(NODE_BODY(node), env);
- NODE_STATUS_REMOVE(node, MARK1);
-
- en->max_len = len;
- NODE_STATUS_ADD(node, MAX_FIXED);
- }
- }
- break;
-
- case BAG_OPTION:
- case BAG_STOP_BACKTRACK:
- len = tree_max_len(NODE_BODY(node), env);
- break;
- case BAG_IF_ELSE:
- {
- OnigLen tlen, elen;
-
- len = tree_max_len(NODE_BODY(node), env);
- if (IS_NOT_NULL(en->te.Then)) {
- tlen = tree_max_len(en->te.Then, env);
- len = distance_add(len, tlen);
- }
- if (IS_NOT_NULL(en->te.Else))
- elen = tree_max_len(en->te.Else, env);
- else elen = 0;
-
- if (elen > len) len = elen;
- }
- break;
- }
- }
- break;
-
- case NODE_ANCHOR:
- case NODE_GIMMICK:
- default:
- break;
- }
-
- return len;
-}
-
-static int
-check_backrefs(Node* node, ScanEnv* env)
-{
- int r;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = check_backrefs(NODE_CAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ANCHOR:
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
- r = 0;
- break;
- }
- /* fall */
- case NODE_QUANT:
- r = check_backrefs(NODE_BODY(node), env);
- break;
-
- case NODE_BAG:
- r = check_backrefs(NODE_BODY(node), env);
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_IF_ELSE) {
- if (r != 0) return r;
- if (IS_NOT_NULL(en->te.Then)) {
- r = check_backrefs(en->te.Then, env);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = check_backrefs(en->te.Else, env);
- }
- }
- }
- break;
-
- case NODE_BACKREF:
- {
- int i;
- BackRefNode* br = BACKREF_(node);
- int* backs = BACKREFS_P(br);
- MemEnv* mem_env = SCANENV_MEMENV(env);
-
- for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem)
- return ONIGERR_INVALID_BACKREF;
-
- NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
- }
- r = 0;
- }
- break;
-
- default:
- r = 0;
- break;
- }
-
- return r;
-}
-
-
-#ifdef USE_CALL
-
-#define RECURSION_EXIST (1<<0)
-#define RECURSION_MUST (1<<1)
-#define RECURSION_INFINITE (1<<2)
-
-static int
-infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
-{
- int ret;
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- {
- Node *x;
- OnigLen min;
-
- x = node;
- do {
- ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
- r |= ret;
- if (head != 0) {
- min = tree_min_len(NODE_CAR(x), env);
- if (min != 0) head = 0;
- }
- } while (IS_NOT_NULL(x = NODE_CDR(x)));
- }
- break;
-
- case NODE_ALT:
- {
- int must;
-
- must = RECURSION_MUST;
- do {
- ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
-
- r |= (ret & RECURSION_EXIST);
- must &= ret;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- r |= must;
- }
- break;
-
- case NODE_QUANT:
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);
- if (r < 0) return r;
- if ((r & RECURSION_MUST) != 0) {
- if (QUANT_(node)->lower == 0)
- r &= ~RECURSION_MUST;
- }
- break;
-
- case NODE_ANCHOR:
- if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
- break;
- /* fall */
- case NODE_CALL:
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_MARK2(node))
- return 0;
- else if (NODE_IS_MARK1(node))
- return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
- : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
- else {
- NODE_STATUS_ADD(node, MARK2);
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);
- NODE_STATUS_REMOVE(node, MARK2);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- int eret;
-
- ret = infinite_recursive_call_check(NODE_BODY(node), env, head);
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
- r |= ret;
- if (IS_NOT_NULL(en->te.Then)) {
- OnigLen min;
- if (head != 0) {
- min = tree_min_len(NODE_BODY(node), env);
- }
- else min = 0;
-
- ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
- if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
- r |= ret;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- eret = infinite_recursive_call_check(en->te.Else, env, head);
- if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
- r |= (eret & RECURSION_EXIST);
- if ((eret & RECURSION_MUST) == 0)
- r &= ~RECURSION_MUST;
- }
- }
- else {
- r = infinite_recursive_call_check(NODE_BODY(node), env, head);
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
-{
- int r;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ANCHOR:
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
- r = 0;
- break;
- }
- /* fall */
- case NODE_QUANT:
- r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
- int ret;
-
- NODE_STATUS_ADD(node, MARK1);
-
- ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
- if (ret < 0) return ret;
- else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
- return ONIGERR_NEVER_ENDING_RECURSION;
-
- NODE_STATUS_REMOVE(node, MARK1);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = infinite_recursive_call_check_trav(en->te.Then, env);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r = infinite_recursive_call_check_trav(en->te.Else, env);
- if (r != 0) return r;
- }
- }
- }
-
- r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
- break;
-
- default:
- r = 0;
- break;
- }
-
- return r;
-}
-
-static int
-recursive_call_check(Node* node)
-{
- int r;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- r = 0;
- do {
- r |= recursive_call_check(NODE_CAR(node));
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_ANCHOR:
- if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
- r = 0;
- break;
- }
- /* fall */
- case NODE_QUANT:
- r = recursive_call_check(NODE_BODY(node));
- break;
-
- case NODE_CALL:
- r = recursive_call_check(NODE_BODY(node));
- if (r != 0) {
- if (NODE_IS_MARK1(NODE_BODY(node)))
- NODE_STATUS_ADD(node, RECURSION);
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_MARK2(node))
- return 0;
- else if (NODE_IS_MARK1(node))
- return 1; /* recursion */
- else {
- NODE_STATUS_ADD(node, MARK2);
- r = recursive_call_check(NODE_BODY(node));
- NODE_STATUS_REMOVE(node, MARK2);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- r = 0;
- if (IS_NOT_NULL(en->te.Then)) {
- r |= recursive_call_check(en->te.Then);
- }
- if (IS_NOT_NULL(en->te.Else)) {
- r |= recursive_call_check(en->te.Else);
- }
- r |= recursive_call_check(NODE_BODY(node));
- }
- else {
- r = recursive_call_check(NODE_BODY(node));
- }
- }
- break;
-
- default:
- r = 0;
- break;
- }
-
- return r;
-}
-
-#define IN_RECURSION (1<<0)
-#define FOUND_CALLED_NODE 1
-
-static int
-recursive_call_check_trav(Node* node, ScanEnv* env, int state)
-{
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- {
- int ret;
- do {
- ret = recursive_call_check_trav(NODE_CAR(node), env, state);
- if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
- else if (ret < 0) return ret;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
- case NODE_QUANT:
- r = recursive_call_check_trav(NODE_BODY(node), env, state);
- if (QUANT_(node)->upper == 0) {
- if (r == FOUND_CALLED_NODE)
- QUANT_(node)->is_refered = 1;
- }
- break;
-
- case NODE_ANCHOR:
- {
- AnchorNode* an = ANCHOR_(node);
- if (ANCHOR_HAS_BODY(an))
- r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
- }
- break;
-
- case NODE_BAG:
- {
- int ret;
- int state1;
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
- if (! NODE_IS_RECURSION(node)) {
- NODE_STATUS_ADD(node, MARK1);
- r = recursive_call_check(NODE_BODY(node));
- if (r != 0)
- NODE_STATUS_ADD(node, RECURSION);
- NODE_STATUS_REMOVE(node, MARK1);
- }
-
- if (NODE_IS_CALLED(node))
- r = FOUND_CALLED_NODE;
- }
- }
-
- state1 = state;
- if (NODE_IS_RECURSION(node))
- state1 |= IN_RECURSION;
-
- ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
- if (ret == FOUND_CALLED_NODE)
- r = FOUND_CALLED_NODE;
-
- if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- ret = recursive_call_check_trav(en->te.Then, env, state1);
- if (ret == FOUND_CALLED_NODE)
- r = FOUND_CALLED_NODE;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- ret = recursive_call_check_trav(en->te.Else, env, state1);
- if (ret == FOUND_CALLED_NODE)
- r = FOUND_CALLED_NODE;
- }
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-#endif
-
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REAL_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_ZERO_REPEAT (1<<4)
-#define IN_MULTI_ENTRY (1<<5)
-#define IN_LOOK_BEHIND (1<<6)
-
-
-/* divide different length alternatives in look-behind.
- (?<=A|B) ==> (?<=A)|(?<=B)
- (?<!A|B) ==> (?<!A)(?<!B)
-*/
-static int
-divide_look_behind_alternatives(Node* node)
-{
- Node *head, *np, *insert_node;
- AnchorNode* an = ANCHOR_(node);
- int anc_type = an->type;
-
- head = NODE_ANCHOR_BODY(an);
- np = NODE_CAR(head);
- swap_node(node, head);
- NODE_CAR(node) = head;
- NODE_BODY(head) = np;
-
- np = node;
- while (IS_NOT_NULL(np = NODE_CDR(np))) {
- insert_node = onig_node_new_anchor(anc_type, an->ascii_mode);
- CHECK_NULL_RETURN_MEMERR(insert_node);
- NODE_BODY(insert_node) = NODE_CAR(np);
- NODE_CAR(np) = insert_node;
- }
-
- if (anc_type == ANCR_LOOK_BEHIND_NOT) {
- np = node;
- do {
- NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */
- } while (IS_NOT_NULL(np = NODE_CDR(np)));
- }
- return 0;
-}
-
-static int
-setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
-{
- int r, len;
- AnchorNode* an = ANCHOR_(node);
-
- r = get_char_len_node(NODE_ANCHOR_BODY(an), reg, &len);
- if (r == 0)
- an->char_len = len;
- else if (r == GET_CHAR_LEN_VARLEN)
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
- r = divide_look_behind_alternatives(node);
- else
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
-
- return r;
-}
-
-static int
-next_setup(Node* node, Node* next_node, regex_t* reg)
-{
- NodeType type;
-
- retry:
- type = NODE_TYPE(node);
- if (type == NODE_QUANT) {
- QuantNode* qn = QUANT_(node);
- if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
-#ifdef USE_QUANT_PEEK_NEXT
- Node* n = get_head_value_node(next_node, 1, reg);
- /* '\0': for UTF-16BE etc... */
- if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
- qn->next_head_exact = n;
- }
-#endif
- /* automatic posseivation a*b ==> (?>a*)b */
- if (qn->lower <= 1) {
- if (is_strict_real_node(NODE_BODY(node))) {
- Node *x, *y;
- x = get_head_value_node(NODE_BODY(node), 0, reg);
- if (IS_NOT_NULL(x)) {
- y = get_head_value_node(next_node, 0, reg);
- if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
- Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
- CHECK_NULL_RETURN_MEMERR(en);
- NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
- swap_node(node, en);
- NODE_BODY(node) = en;
- }
- }
- }
- }
- }
- }
- else if (type == NODE_BAG) {
- BagNode* en = BAG_(node);
- if (en->type == BAG_MEMORY) {
- node = NODE_BODY(node);
- goto retry;
- }
- }
- return 0;
-}
-
-
-static int
-update_string_node_case_fold(regex_t* reg, Node *node)
-{
- UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar *sbuf, *ebuf, *sp;
- int r, i, len, sbuf_size;
- StrNode* sn = STR_(node);
-
- end = sn->end;
- sbuf_size = (int )(end - sn->s) * 2;
- sbuf = (UChar* )xmalloc(sbuf_size);
- CHECK_NULL_RETURN_MEMERR(sbuf);
- ebuf = sbuf + sbuf_size;
-
- sp = sbuf;
- p = sn->s;
- while (p < end) {
- len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
- for (i = 0; i < len; i++) {
- if (sp >= ebuf) {
- sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2, sbuf_size);
- CHECK_NULL_RETURN_MEMERR(sbuf);
- sp = sbuf + sbuf_size;
- sbuf_size *= 2;
- ebuf = sbuf + sbuf_size;
- }
-
- *sp++ = buf[i];
- }
- }
-
- r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
-
- xfree(sbuf);
- return 0;
-}
-
-static int
-expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
-{
- int r;
- Node *node;
-
- node = onig_node_new_str(s, end);
- if (IS_NULL(node)) return ONIGERR_MEMORY;
-
- r = update_string_node_case_fold(reg, node);
- if (r != 0) {
- onig_node_free(node);
- return r;
- }
-
- NODE_STRING_SET_AMBIG(node);
- NODE_STRING_SET_DONT_GET_OPT_INFO(node);
- *rnode = node;
- return 0;
-}
-
-static int
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
- int slen, UChar *end, regex_t* reg, Node **rnode)
-{
- int r, i, j;
- int len;
- int varlen;
- Node *anode, *var_anode, *snode, *xnode, *an;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
-
- *rnode = var_anode = NULL_NODE;
-
- varlen = 0;
- for (i = 0; i < item_num; i++) {
- if (items[i].byte_len != slen) {
- varlen = 1;
- break;
- }
- }
-
- if (varlen != 0) {
- *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
-
- xnode = onig_node_new_list(NULL, NULL);
- if (IS_NULL(xnode)) goto mem_err;
- NODE_CAR(var_anode) = xnode;
-
- anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) goto mem_err;
- NODE_CAR(xnode) = anode;
- }
- else {
- *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) return ONIGERR_MEMORY;
- }
-
- snode = onig_node_new_str(p, p + slen);
- if (IS_NULL(snode)) goto mem_err;
-
- NODE_CAR(anode) = snode;
-
- for (i = 0; i < item_num; i++) {
- snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
-
- for (j = 0; j < items[i].code_len; j++) {
- len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
- if (len < 0) {
- r = len;
- goto mem_err2;
- }
-
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r != 0) goto mem_err2;
- }
-
- an = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(an)) {
- goto mem_err2;
- }
- //The NULL pointer check is not necessary. It is added just for pass static
- //analysis. When condition "items[i].byte_len != slen" is true, "varlen = 1"
- //in line 3503 will be reached ,so that "if (IS_NULL(var_anode)) return ONIGERR_MEMORY"
- //in line 3510 will be executed, so the null pointer has been checked before
- //deferenced in line 3584.
- if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
- Node *rem;
- UChar *q = p + items[i].byte_len;
-
- if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
-
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
-
- NODE_CAR(an) = xnode;
- }
- else {
- NODE_CAR(an) = snode;
- }
-
- NODE_CDR(var_anode) = an;
- var_anode = an;
- }
- else {
- NODE_CAR(an) = snode;
- NODE_CDR(anode) = an;
- anode = an;
- }
- }
-
- return varlen;
-
- mem_err2:
- onig_node_free(snode);
-
- mem_err:
- onig_node_free(*rnode);
-
- return ONIGERR_MEMORY;
-}
-
-static int
-is_good_case_fold_items_for_search(OnigEncoding enc, int slen,
- int n, OnigCaseFoldCodeItem items[])
-{
- int i, len;
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- for (i = 0; i < n; i++) {
- OnigCaseFoldCodeItem* item = items + i;
-
- if (item->code_len != 1) return 0;
- if (item->byte_len != slen) return 0;
- len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);
- if (len != slen) return 0;
- }
-
- return 1;
-}
-
-#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
-
-static int
-expand_case_fold_string(Node* node, regex_t* reg, int state)
-{
- int r, n, len, alt_num;
- int fold_len;
- int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;
- UChar *start, *end, *p;
- UChar* foldp;
- Node *top_root, *root, *snode, *prev_node;
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- StrNode* sn;
-
- if (NODE_STRING_IS_AMBIG(node)) return 0;
-
- sn = STR_(node);
-
- start = sn->s;
- end = sn->end;
- if (start >= end) return 0;
-
- is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
-
- r = 0;
- top_root = root = prev_node = snode = NULL_NODE;
- alt_num = 1;
- p = start;
- while (p < end) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
- p, end, items);
- if (n < 0) {
- r = n;
- goto err;
- }
-
- len = enclen(reg->enc, p);
- is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);
-
- if (is_in_look_behind ||
- (IS_NOT_NULL(snode) ||
- (is_good
- /* expand single char case: ex. /(?i:a)/ */
- && !(p == start && p + len >= end)))) {
- if (IS_NULL(snode)) {
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
-
- prev_is_ambig = -1; /* -1: new */
- prev_is_good = 0; /* escape compiler warning */
- }
- else {
- prev_is_ambig = NODE_STRING_IS_AMBIG(snode);
- prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);
- }
-
- if (n != 0) {
- foldp = p;
- fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,
- &foldp, end, buf);
- foldp = buf;
- }
- else {
- foldp = p; fold_len = len;
- }
-
- if ((prev_is_ambig == 0 && n != 0) ||
- (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {
- if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
- else {
- r = onig_node_str_cat(snode, foldp, foldp + fold_len);
- if (r != 0) goto err;
- }
-
- if (n != 0) NODE_STRING_SET_AMBIG(snode);
- if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);
- }
- else {
- alt_num *= (n + 1);
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
-
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
- if (r < 0) goto mem_err;
- if (r == 1) {
- if (IS_NULL(root)) {
- top_root = prev_node;
- }
- else {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- root = NODE_CAR(prev_node);
- }
- else { /* r == 0 */
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
- }
-
- snode = NULL_NODE;
- }
-
- p += len;
- }
-
- if (p < end) {
- Node *srem;
-
- r = expand_case_fold_make_rem_string(&srem, p, end, reg);
- if (r != 0) goto mem_err;
-
- if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(srem);
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- if (IS_NULL(root)) {
- prev_node = srem;
- }
- else {
- if (IS_NULL(onig_node_list_add(root, srem))) {
- onig_node_free(srem);
- goto mem_err;
- }
- }
- }
-
- /* ending */
- top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- swap_node(node, top_root);
- onig_node_free(top_root);
- return 0;
-
- mem_err:
- r = ONIGERR_MEMORY;
-
- err:
- onig_node_free(top_root);
- return r;
-}
-
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum BodyEmptyType
-quantifiers_memory_node_info(Node* node)
-{
- int r = BODY_IS_EMPTY_POSSIBILITY;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- {
- int v;
- do {
- v = quantifiers_memory_node_info(NODE_CAR(node));
- if (v > r) r = v;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
- }
- else
- r = quantifiers_memory_node_info(NODE_BODY(node));
- break;
-#endif
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
- if (qn->upper != 0) {
- r = quantifiers_memory_node_info(NODE_BODY(node));
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
- switch (en->type) {
- case BAG_MEMORY:
- if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_POSSIBILITY_REC;
- }
- return BODY_IS_EMPTY_POSSIBILITY_MEM;
- break;
-
- case BAG_OPTION:
- case BAG_STOP_BACKTRACK:
- r = quantifiers_memory_node_info(NODE_BODY(node));
- break;
- case BAG_IF_ELSE:
- {
- int v;
- r = quantifiers_memory_node_info(NODE_BODY(node));
- if (IS_NOT_NULL(en->te.Then)) {
- v = quantifiers_memory_node_info(en->te.Then);
- if (v > r) r = v;
- }
- if (IS_NOT_NULL(en->te.Else)) {
- v = quantifiers_memory_node_info(en->te.Else);
- if (v > r) r = v;
- }
- }
- break;
- }
- }
- break;
-
- case NODE_BACKREF:
- case NODE_STRING:
- case NODE_CTYPE:
- case NODE_CCLASS:
- case NODE_ANCHOR:
- case NODE_GIMMICK:
- default:
- break;
- }
-
- return r;
-}
-#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
-
-
-#ifdef USE_CALL
-
-#ifdef __GNUC__
-__inline
-#endif
-static int
-setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
-{
- MemEnv* mem_env = SCANENV_MEMENV(env);
-
- if (cn->by_number != 0) {
- int gnum = cn->group_num;
-
- if (env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
-
- if (gnum > env->num_mem) {
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
- cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;
- }
-
- set_call_attr:
- NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
- if (IS_NULL(NODE_CALL_BODY(cn))) {
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
- cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- }
- else {
- int *refs;
-
- int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
- if (n <= 0) {
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
- cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- else if (n > 1) {
- onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
- cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
- }
- else {
- cn->group_num = refs[0];
- goto set_call_attr;
- }
- }
-
- return 0;
-}
-
-static void
-setup_call2_call(Node* node)
-{
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- setup_call2_call(NODE_CAR(node));
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- setup_call2_call(NODE_BODY(node));
- break;
-
- case NODE_ANCHOR:
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- setup_call2_call(NODE_BODY(node));
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (! NODE_IS_MARK1(node)) {
- NODE_STATUS_ADD(node, MARK1);
- setup_call2_call(NODE_BODY(node));
- NODE_STATUS_REMOVE(node, MARK1);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- setup_call2_call(NODE_BODY(node));
- if (IS_NOT_NULL(en->te.Then))
- setup_call2_call(en->te.Then);
- if (IS_NOT_NULL(en->te.Else))
- setup_call2_call(en->te.Else);
- }
- else {
- setup_call2_call(NODE_BODY(node));
- }
- }
- break;
-
- case NODE_CALL:
- if (! NODE_IS_MARK1(node)) {
- NODE_STATUS_ADD(node, MARK1);
- {
- CallNode* cn = CALL_(node);
- Node* called = NODE_CALL_BODY(cn);
-
- cn->entry_count++;
-
- NODE_STATUS_ADD(called, CALLED);
- BAG_(called)->m.entry_count++;
- setup_call2_call(called);
- }
- NODE_STATUS_REMOVE(node, MARK1);
- }
- break;
-
- default:
- break;
- }
-}
-
-static int
-setup_call(Node* node, ScanEnv* env, int state)
-{
- int r;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = setup_call(NODE_CAR(node), env, state);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- if (QUANT_(node)->upper == 0)
- state |= IN_ZERO_REPEAT;
-
- r = setup_call(NODE_BODY(node), env, state);
- break;
-
- case NODE_ANCHOR:
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call(NODE_BODY(node), env, state);
- else
- r = 0;
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if ((state & IN_ZERO_REPEAT) != 0) {
- NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
- BAG_(node)->m.entry_count--;
- }
- r = setup_call(NODE_BODY(node), env, state);
- }
- else if (en->type == BAG_IF_ELSE) {
- r = setup_call(NODE_BODY(node), env, state);
- if (r != 0) return r;
- if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call(en->te.Then, env, state);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else))
- r = setup_call(en->te.Else, env, state);
- }
- else
- r = setup_call(NODE_BODY(node), env, state);
- }
- break;
-
- case NODE_CALL:
- if ((state & IN_ZERO_REPEAT) != 0) {
- NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
- CALL_(node)->entry_count--;
- }
-
- r = setup_call_node_call(CALL_(node), env, state);
- break;
-
- default:
- r = 0;
- break;
- }
-
- return r;
-}
-
-static int
-setup_call2(Node* node)
-{
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- case NODE_ALT:
- do {
- r = setup_call2(NODE_CAR(node));
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- if (QUANT_(node)->upper != 0)
- r = setup_call2(NODE_BODY(node));
- break;
-
- case NODE_ANCHOR:
- if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call2(NODE_BODY(node));
- break;
-
- case NODE_BAG:
- if (! NODE_IS_IN_ZERO_REPEAT(node))
- r = setup_call2(NODE_BODY(node));
-
- {
- BagNode* en = BAG_(node);
-
- if (r != 0) return r;
- if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call2(en->te.Then);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else))
- r = setup_call2(en->te.Else);
- }
- }
- break;
-
- case NODE_CALL:
- if (! NODE_IS_IN_ZERO_REPEAT(node)) {
- setup_call2_call(node);
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-
-static void
-setup_called_state_call(Node* node, int state)
-{
- switch (NODE_TYPE(node)) {
- case NODE_ALT:
- state |= IN_ALT;
- /* fall */
- case NODE_LIST:
- do {
- setup_called_state_call(NODE_CAR(node), state);
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
-
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
- state |= IN_REAL_REPEAT;
- if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
-
- setup_called_state_call(NODE_QUANT_BODY(qn), state);
- }
- break;
-
- case NODE_ANCHOR:
- {
- AnchorNode* an = ANCHOR_(node);
-
- switch (an->type) {
- case ANCR_PREC_READ_NOT:
- case ANCR_LOOK_BEHIND_NOT:
- state |= IN_NOT;
- /* fall */
- case ANCR_PREC_READ:
- case ANCR_LOOK_BEHIND:
- setup_called_state_call(NODE_ANCHOR_BODY(an), state);
- break;
- default:
- break;
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- if (en->type == BAG_MEMORY) {
- if (NODE_IS_MARK1(node)) {
- if ((~en->m.called_state & state) != 0) {
- en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
- }
- }
- else {
- NODE_STATUS_ADD(node, MARK1);
- en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
- NODE_STATUS_REMOVE(node, MARK1);
- }
- }
- else if (en->type == BAG_IF_ELSE) {
- if (IS_NOT_NULL(en->te.Then)) {
- setup_called_state_call(en->te.Then, state);
- }
- if (IS_NOT_NULL(en->te.Else))
- setup_called_state_call(en->te.Else, state);
- }
- else {
- setup_called_state_call(NODE_BODY(node), state);
- }
- }
- break;
-
- case NODE_CALL:
- setup_called_state_call(NODE_BODY(node), state);
- break;
-
- default:
- break;
- }
-}
-
-static void
-setup_called_state(Node* node, int state)
-{
- switch (NODE_TYPE(node)) {
- case NODE_ALT:
- state |= IN_ALT;
- /* fall */
- case NODE_LIST:
- do {
- setup_called_state(NODE_CAR(node), state);
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- setup_called_state_call(node, state);
- break;
-#endif
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- switch (en->type) {
- case BAG_MEMORY:
- if (en->m.entry_count > 1)
- state |= IN_MULTI_ENTRY;
-
- en->m.called_state |= state;
- /* fall */
- case BAG_OPTION:
- case BAG_STOP_BACKTRACK:
- setup_called_state(NODE_BODY(node), state);
- break;
- case BAG_IF_ELSE:
- setup_called_state(NODE_BODY(node), state);
- if (IS_NOT_NULL(en->te.Then))
- setup_called_state(en->te.Then, state);
- if (IS_NOT_NULL(en->te.Else))
- setup_called_state(en->te.Else, state);
- break;
- }
- }
- break;
-
- case NODE_QUANT:
- {
- QuantNode* qn = QUANT_(node);
-
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
- state |= IN_REAL_REPEAT;
- if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
-
- setup_called_state(NODE_QUANT_BODY(qn), state);
- }
- break;
-
- case NODE_ANCHOR:
- {
- AnchorNode* an = ANCHOR_(node);
-
- switch (an->type) {
- case ANCR_PREC_READ_NOT:
- case ANCR_LOOK_BEHIND_NOT:
- state |= IN_NOT;
- /* fall */
- case ANCR_PREC_READ:
- case ANCR_LOOK_BEHIND:
- setup_called_state(NODE_ANCHOR_BODY(an), state);
- break;
- default:
- break;
- }
- }
- break;
-
- case NODE_BACKREF:
- case NODE_STRING:
- case NODE_CTYPE:
- case NODE_CCLASS:
- case NODE_GIMMICK:
- default:
- break;
- }
-}
-
-#endif /* USE_CALL */
-
-
-static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
-
-#ifdef __GNUC__
-__inline
-#endif
-static int
-setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
-{
-/* allowed node types in look-behind */
-#define ALLOWED_TYPE_IN_LB \
- ( NODE_BIT_LIST | NODE_BIT_ALT | NODE_BIT_STRING | NODE_BIT_CCLASS \
- | NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \
- | NODE_BIT_CALL | NODE_BIT_GIMMICK)
-
-#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
-#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
-
-#define ALLOWED_ANCHOR_IN_LB \
- ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
- | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
- | ANCR_WORD_BEGIN | ANCR_WORD_END \
- | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
-
-#define ALLOWED_ANCHOR_IN_LB_NOT \
- ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
- | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
- | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
- | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
-
- int r;
- AnchorNode* an = ANCHOR_(node);
-
- switch (an->type) {
- case ANCR_PREC_READ:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
- break;
- case ANCR_PREC_READ_NOT:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
- break;
-
- case ANCR_LOOK_BEHIND:
- {
- r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
- ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
- if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
- }
- break;
-
- case ANCR_LOOK_BEHIND_NOT:
- {
- r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
- ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
- env);
- if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
- }
- break;
-
- default:
- r = 0;
- break;
- }
-
- return r;
-}
-
-#ifdef __GNUC__
-__inline
-#endif
-static int
-setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
-{
- int r;
- OnigLen d;
- QuantNode* qn = QUANT_(node);
- Node* body = NODE_BODY(node);
-
- if ((state & IN_REAL_REPEAT) != 0) {
- NODE_STATUS_ADD(node, IN_REAL_REPEAT);
- }
- if ((state & IN_MULTI_ENTRY) != 0) {
- NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
- }
-
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
- d = tree_min_len(body, env);
- if (d == 0) {
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->emptiness = quantifiers_memory_node_info(body);
- if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {
- if (NODE_TYPE(body) == NODE_BAG &&
- BAG_(body)->type == BAG_MEMORY) {
- MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
- }
- }
-#else
- qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
-#endif
- }
- }
-
- if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
- state |= IN_REAL_REPEAT;
- if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
-
- r = setup_tree(body, reg, state, env);
- if (r != 0) return r;
-
- /* expand string */
-#define EXPAND_STRING_MAX_LENGTH 100
- if (NODE_TYPE(body) == NODE_STRING) {
- if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
- qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int len = NODE_STRING_LEN(body);
- StrNode* sn = STR_(body);
-
- if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int i, n = qn->lower;
- onig_node_conv_to_str_node(node, STR_(body)->flag);
- for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
- if (r != 0) return r;
- }
- onig_node_free(body);
- return r;
- }
- }
- }
-
- if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
- if (NODE_TYPE(body) == NODE_QUANT) {
- QuantNode* tqn = QUANT_(body);
- if (IS_NOT_NULL(tqn->head_exact)) {
- qn->head_exact = tqn->head_exact;
- tqn->head_exact = NULL;
- }
- }
- else {
- qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
- }
- }
-
- return r;
-}
-
-/* setup_tree does the following work.
- 1. check empty loop. (set qn->emptiness)
- 2. expand ignore-case in char class.
- 3. set memory status bit flags. (reg->mem_stats)
- 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
- 5. find invalid patterns in look-behind.
- 6. expand repeated string.
- */
-static int
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
-{
- int r = 0;
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- {
- Node* prev = NULL_NODE;
- do {
- r = setup_tree(NODE_CAR(node), reg, state, env);
- if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NODE_CAR(node), reg);
- }
- prev = NODE_CAR(node);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- }
- break;
-
- case NODE_ALT:
- do {
- r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- case NODE_STRING:
- if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg, state);
- }
- break;
-
- case NODE_BACKREF:
- {
- int i;
- int* p;
- BackRefNode* br = BACKREF_(node);
- p = BACKREFS_P(br);
- for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- MEM_STATUS_ON(env->backrefed_mem, p[i]);
- MEM_STATUS_ON(env->bt_mem_start, p[i]);
-#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- MEM_STATUS_ON(env->bt_mem_end, p[i]);
- }
-#endif
- }
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- switch (en->type) {
- case BAG_OPTION:
- {
- OnigOptionType options = reg->options;
- reg->options = BAG_(node)->o.options;
- r = setup_tree(NODE_BODY(node), reg, state, env);
- reg->options = options;
- }
- break;
-
- case BAG_MEMORY:
-#ifdef USE_CALL
- state |= en->m.called_state;
-#endif
-
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
- || NODE_IS_RECURSION(node)) {
- MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
- }
- r = setup_tree(NODE_BODY(node), reg, state, env);
- break;
-
- case BAG_STOP_BACKTRACK:
- {
- Node* target = NODE_BODY(node);
- r = setup_tree(target, reg, state, env);
- if (NODE_TYPE(target) == NODE_QUANT) {
- QuantNode* tqn = QUANT_(target);
- if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
- tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- if (is_strict_real_node(NODE_BODY(target)))
- NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
- }
- }
- }
- break;
-
- case BAG_IF_ELSE:
- r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
- if (r != 0) return r;
- if (IS_NOT_NULL(en->te.Then)) {
- r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
- if (r != 0) return r;
- }
- if (IS_NOT_NULL(en->te.Else))
- r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
- break;
- }
- }
- break;
-
- case NODE_QUANT:
- r = setup_quant(node, reg, state, env);
- break;
-
- case NODE_ANCHOR:
- r = setup_anchor(node, reg, state, env);
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
-#endif
- case NODE_CTYPE:
- case NODE_CCLASS:
- case NODE_GIMMICK:
- default:
- break;
- }
-
- return r;
-}
-
-static int
-set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,
- UChar* s, UChar* end,
- UChar skip[], int* roffset)
-{
- int i, j, k, len, offset;
- int n, clen;
- UChar* p;
- OnigEncoding enc;
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- enc = reg->enc;
- offset = ENC_GET_SKIP_OFFSET(enc);
- if (offset == ENC_SKIP_OFFSET_1_OR_0) {
- UChar* p = s;
- while (1) {
- len = enclen(enc, p);
- if (p + len >= end) {
- if (len == 1) offset = 1;
- else offset = 0;
- break;
- }
- p += len;
- }
- }
-
- len = (int )(end - s);
- if (len + offset >= 255)
- return ONIGERR_PARSER_BUG;
-
- *roffset = offset;
-
- for (i = 0; i < CHAR_MAP_SIZE; i++) {
- skip[i] = (UChar )(len + offset);
- }
-
- for (p = s; p < end; ) {
- int z;
-
- clen = enclen(enc, p);
- if (p + clen > end) clen = (int )(end - p);
-
- len = (int )(end - p);
- for (j = 0; j < clen; j++) {
- z = len - j + (offset - 1);
- if (z <= 0) break;
- skip[p[j]] = z;
- }
-
- if (case_expand != 0) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
- p, end, items);
- for (k = 0; k < n; k++) {
- ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
- for (j = 0; j < clen; j++) {
- z = len - j + (offset - 1);
- if (z <= 0) break;
- if (skip[buf[j]] > z)
- skip[buf[j]] = z;
- }
- }
- }
-
- p += clen;
- }
-
- return 0;
-}
-
-
-#define OPT_EXACT_MAXLEN 24
-
-#if OPT_EXACT_MAXLEN >= 255
-#error Too big OPT_EXACT_MAXLEN
-#endif
-
-typedef struct {
- OnigLen min; /* min byte length */
- OnigLen max; /* max byte length */
-} MinMax;
-
-typedef struct {
- MinMax mmd;
- OnigEncoding enc;
- OnigOptionType options;
- OnigCaseFoldType case_fold_flag;
- ScanEnv* scan_env;
-} OptEnv;
-
-typedef struct {
- int left;
- int right;
-} OptAnc;
-
-typedef struct {
- MinMax mmd; /* position */
- OptAnc anc;
- int reach_end;
- int case_fold;
- int good_case_fold;
- int len;
- UChar s[OPT_EXACT_MAXLEN];
-} OptStr;
-
-typedef struct {
- MinMax mmd; /* position */
- OptAnc anc;
- int value; /* weighted value */
- UChar map[CHAR_MAP_SIZE];
-} OptMap;
-
-typedef struct {
- MinMax len;
- OptAnc anc;
- OptStr sb; /* boundary */
- OptStr sm; /* middle */
- OptStr spr; /* prec read (?=...) */
- OptMap map; /* boundary */
-} OptNode;
-
-
-static int
-map_position_value(OnigEncoding enc, int i)
-{
- static const short int Vals[] = {
- 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
- };
-
- if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
- if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
- return 20;
- else
- return (int )Vals[i];
- }
- else
- return 4; /* Take it easy. */
-}
-
-static int
-distance_value(MinMax* mm)
-{
- /* 1000 / (min-max-dist + 1) */
- static const short int dist_vals[] = {
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
- };
-
- OnigLen d;
-
- if (mm->max == INFINITE_LEN) return 0;
-
- d = mm->max - mm->min;
- if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
- /* return dist_vals[d] * 16 / (mm->min + 12); */
- return (int )dist_vals[d];
- else
- return 1;
-}
-
-static int
-comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2)
-{
- if (v2 <= 0) return -1;
- if (v1 <= 0) return 1;
-
- v1 *= distance_value(d1);
- v2 *= distance_value(d2);
-
- if (v2 > v1) return 1;
- if (v2 < v1) return -1;
-
- if (d2->min < d1->min) return 1;
- if (d2->min > d1->min) return -1;
- return 0;
-}
-
-static int
-is_equal_mml(MinMax* a, MinMax* b)
-{
- return a->min == b->min && a->max == b->max;
-}
-
-static void
-set_mml(MinMax* l, OnigLen min, OnigLen max)
-{
- l->min = min;
- l->max = max;
-}
-
-static void
-clear_mml(MinMax* l)
-{
- l->min = l->max = 0;
-}
-
-static void
-copy_mml(MinMax* to, MinMax* from)
-{
- to->min = from->min;
- to->max = from->max;
-}
-
-static void
-add_mml(MinMax* to, MinMax* from)
-{
- to->min = distance_add(to->min, from->min);
- to->max = distance_add(to->max, from->max);
-}
-
-static void
-alt_merge_mml(MinMax* to, MinMax* from)
-{
- if (to->min > from->min) to->min = from->min;
- if (to->max < from->max) to->max = from->max;
-}
-
-static void
-copy_opt_env(OptEnv* to, OptEnv* from)
-{
- *to = *from;
-}
-
-static void
-clear_opt_anc_info(OptAnc* a)
-{
- a->left = 0;
- a->right = 0;
-}
-
-static void
-copy_opt_anc_info(OptAnc* to, OptAnc* from)
-{
- *to = *from;
-}
-
-static void
-concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
- OnigLen left_len, OnigLen right_len)
-{
- clear_opt_anc_info(to);
-
- to->left = left->left;
- if (left_len == 0) {
- to->left |= right->left;
- }
-
- to->right = right->right;
- if (right_len == 0) {
- to->right |= left->right;
- }
- else {
- to->right |= (left->right & ANCR_PREC_READ_NOT);
- }
-}
-
-static int
-is_left(int a)
-{
- if (a == ANCR_END_BUF || a == ANCR_SEMI_END_BUF ||
- a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)
- return 0;
-
- return 1;
-}
-
-static int
-is_set_opt_anc_info(OptAnc* to, int anc)
-{
- if ((to->left & anc) != 0) return 1;
-
- return ((to->right & anc) != 0 ? 1 : 0);
-}
-
-static void
-add_opt_anc_info(OptAnc* to, int anc)
-{
- if (is_left(anc))
- to->left |= anc;
- else
- to->right |= anc;
-}
-
-static void
-remove_opt_anc_info(OptAnc* to, int anc)
-{
- if (is_left(anc))
- to->left &= ~anc;
- else
- to->right &= ~anc;
-}
-
-static void
-alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
-{
- to->left &= add->left;
- to->right &= add->right;
-}
-
-static int
-is_full_opt_exact(OptStr* e)
-{
- return e->len >= OPT_EXACT_MAXLEN;
-}
-
-static void
-clear_opt_exact(OptStr* e)
-{
- clear_mml(&e->mmd);
- clear_opt_anc_info(&e->anc);
- e->reach_end = 0;
- e->case_fold = 0;
- e->good_case_fold = 0;
- e->len = 0;
- e->s[0] = '\0';
-}
-
-static void
-copy_opt_exact(OptStr* to, OptStr* from)
-{
- *to = *from;
-}
-
-static int
-concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
-{
- int i, j, len, r;
- UChar *p, *end;
- OptAnc tanc;
-
- if (add->case_fold != 0) {
- if (! to->case_fold) {
- if (to->len > 1 || to->len >= add->len) return 0; /* avoid */
-
- to->case_fold = 1;
- }
- else {
- if (to->good_case_fold != 0) {
- if (add->good_case_fold == 0) return 0;
- }
- }
- }
-
- r = 0;
- p = add->s;
- end = p + add->len;
- for (i = to->len; p < end; ) {
- len = enclen(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) {
- r = 1; /* 1:full */
- break;
- }
- for (j = 0; j < len && p < end; j++)
- to->s[i++] = *p++;
- }
-
- to->len = i;
- to->reach_end = (p == end ? add->reach_end : 0);
-
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
- if (! to->reach_end) tanc.right = 0;
- copy_opt_anc_info(&to->anc, &tanc);
-
- return r;
-}
-
-static void
-concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
-{
- int i, j, len;
- UChar *p;
-
- for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- len = enclen(enc, p);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len && p < end; j++)
- to->s[i++] = *p++;
- }
-
- to->len = i;
-
- if (p >= end && to->len == (int )(end - s))
- to->reach_end = 1;
-}
-
-static void
-alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
-{
- int i, j, len;
-
- if (add->len == 0 || to->len == 0) {
- clear_opt_exact(to);
- return ;
- }
-
- if (! is_equal_mml(&to->mmd, &add->mmd)) {
- clear_opt_exact(to);
- return ;
- }
-
- for (i = 0; i < to->len && i < add->len; ) {
- if (to->s[i] != add->s[i]) break;
- len = enclen(env->enc, to->s + i);
-
- for (j = 1; j < len; j++) {
- if (to->s[i+j] != add->s[i+j]) break;
- }
- if (j < len) break;
- i += len;
- }
-
- if (! add->reach_end || i < add->len || i < to->len) {
- to->reach_end = 0;
- }
- to->len = i;
- if (add->case_fold != 0)
- to->case_fold = 1;
- if (add->good_case_fold == 0)
- to->good_case_fold = 0;
-
- alt_merge_opt_anc_info(&to->anc, &add->anc);
- if (! to->reach_end) to->anc.right = 0;
-}
-
-static void
-select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
-{
- int vn, va;
-
- vn = now->len;
- va = alt->len;
-
- if (va == 0) {
- return ;
- }
- else if (vn == 0) {
- copy_opt_exact(now, alt);
- return ;
- }
- else if (vn <= 2 && va <= 2) {
- /* ByteValTable[x] is big value --> low price */
- va = map_position_value(enc, now->s[0]);
- vn = map_position_value(enc, alt->s[0]);
-
- if (now->len > 1) vn += 5;
- if (alt->len > 1) va += 5;
- }
-
- if (now->case_fold == 0) vn *= 2;
- if (alt->case_fold == 0) va *= 2;
-
- if (now->good_case_fold != 0) vn *= 4;
- if (alt->good_case_fold != 0) va *= 4;
-
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
- copy_opt_exact(now, alt);
-}
-
-static void
-clear_opt_map(OptMap* map)
-{
- static const OptMap clean_info = {
- {0, 0}, {0, 0}, 0,
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- }
- };
-
- xmemcpy(map, &clean_info, sizeof(OptMap));
-}
-
-static void
-copy_opt_map(OptMap* to, OptMap* from)
-{
- xmemcpy(to,from,sizeof(OptMap));
-}
-
-static void
-add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
-{
- if (m->map[c] == 0) {
- m->map[c] = 1;
- m->value += map_position_value(enc, c);
- }
-}
-
-static int
-add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end,
- OnigEncoding enc, OnigCaseFoldType fold_flag)
-{
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- int i, n;
-
- add_char_opt_map(map, p[0], enc);
-
- fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag);
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items);
- if (n < 0) return n;
-
- for (i = 0; i < n; i++) {
- ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
- add_char_opt_map(map, buf[0], enc);
- }
-
- return 0;
-}
-
-static void
-select_opt_map(OptMap* now, OptMap* alt)
-{
- static int z = 1<<15; /* 32768: something big value */
-
- int vn, va;
-
- if (alt->value == 0) return ;
- if (now->value == 0) {
- copy_opt_map(now, alt);
- return ;
- }
-
- vn = z / now->value;
- va = z / alt->value;
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
- copy_opt_map(now, alt);
-}
-
-static int
-comp_opt_exact_or_map(OptStr* e, OptMap* m)
-{
-#define COMP_EM_BASE 20
- int ae, am;
- int case_value;
-
- if (m->value <= 0) return -1;
-
- if (e->case_fold != 0) {
- if (e->good_case_fold != 0)
- case_value = 2;
- else
- case_value = 1;
- }
- else
- case_value = 3;
-
- ae = COMP_EM_BASE * e->len * case_value;
- am = COMP_EM_BASE * 5 * 2 / m->value;
- return comp_distance_value(&e->mmd, &m->mmd, ae, am);
-}
-
-static void
-alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
-{
- int i, val;
-
- /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
- if (to->value == 0) return ;
- if (add->value == 0 || to->mmd.max < add->mmd.min) {
- clear_opt_map(to);
- return ;
- }
-
- alt_merge_mml(&to->mmd, &add->mmd);
-
- val = 0;
- for (i = 0; i < CHAR_MAP_SIZE; i++) {
- if (add->map[i])
- to->map[i] = 1;
-
- if (to->map[i])
- val += map_position_value(enc, i);
- }
- to->value = val;
-
- alt_merge_opt_anc_info(&to->anc, &add->anc);
-}
-
-static void
-set_bound_node_opt_info(OptNode* opt, MinMax* plen)
-{
- copy_mml(&(opt->sb.mmd), plen);
- copy_mml(&(opt->spr.mmd), plen);
- copy_mml(&(opt->map.mmd), plen);
-}
-
-static void
-clear_node_opt_info(OptNode* opt)
-{
- clear_mml(&opt->len);
- clear_opt_anc_info(&opt->anc);
- clear_opt_exact(&opt->sb);
- clear_opt_exact(&opt->sm);
- clear_opt_exact(&opt->spr);
- clear_opt_map(&opt->map);
-}
-
-static void
-copy_node_opt_info(OptNode* to, OptNode* from)
-{
- xmemcpy(to,from,sizeof(OptNode));
-}
-
-static void
-concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
-{
- int sb_reach, sm_reach;
- OptAnc tanc;
-
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
- copy_opt_anc_info(&to->anc, &tanc);
-
- if (add->sb.len > 0 && to->len.max == 0) {
- concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);
- copy_opt_anc_info(&add->sb.anc, &tanc);
- }
-
- if (add->map.value > 0 && to->len.max == 0) {
- if (add->map.mmd.max == 0)
- add->map.anc.left |= to->anc.left;
- }
-
- sb_reach = to->sb.reach_end;
- sm_reach = to->sm.reach_end;
-
- if (add->len.max != 0)
- to->sb.reach_end = to->sm.reach_end = 0;
-
- if (add->sb.len > 0) {
- if (sb_reach) {
- concat_opt_exact(&to->sb, &add->sb, enc);
- clear_opt_exact(&add->sb);
- }
- else if (sm_reach) {
- concat_opt_exact(&to->sm, &add->sb, enc);
- clear_opt_exact(&add->sb);
- }
- }
- select_opt_exact(enc, &to->sm, &add->sb);
- select_opt_exact(enc, &to->sm, &add->sm);
-
- if (to->spr.len > 0) {
- if (add->len.max > 0) {
- if (to->spr.len > (int )add->len.max)
- to->spr.len = add->len.max;
-
- if (to->spr.mmd.max == 0)
- select_opt_exact(enc, &to->sb, &to->spr);
- else
- select_opt_exact(enc, &to->sm, &to->spr);
- }
- }
- else if (add->spr.len > 0) {
- copy_opt_exact(&to->spr, &add->spr);
- }
-
- select_opt_map(&to->map, &add->map);
- add_mml(&to->len, &add->len);
-}
-
-static void
-alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)
-{
- alt_merge_opt_anc_info(&to->anc, &add->anc);
- alt_merge_opt_exact(&to->sb, &add->sb, env);
- alt_merge_opt_exact(&to->sm, &add->sm, env);
- alt_merge_opt_exact(&to->spr, &add->spr, env);
- alt_merge_opt_map(env->enc, &to->map, &add->map);
-
- alt_merge_mml(&to->len, &add->len);
-}
-
-
-#define MAX_NODE_OPT_INFO_REF_COUNT 5
-
-static int
-optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
-{
- int i;
- int r;
- OptNode xo;
- OnigEncoding enc;
-
- r = 0;
- enc = env->enc;
- clear_node_opt_info(opt);
- set_bound_node_opt_info(opt, &env->mmd);
-
- switch (NODE_TYPE(node)) {
- case NODE_LIST:
- {
- OptEnv nenv;
- Node* nd = node;
-
- copy_opt_env(&nenv, env);
- do {
- r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
- if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
- concat_left_node_opt_info(enc, opt, &xo);
- }
- } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
- }
- break;
-
- case NODE_ALT:
- {
- Node* nd = node;
-
- do {
- r = optimize_nodes(NODE_CAR(nd), &xo, env);
- if (r == 0) {
- if (nd == node) copy_node_opt_info(opt, &xo);
- else alt_merge_node_opt_info(opt, &xo, env);
- }
- } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
- }
- break;
-
- case NODE_STRING:
- {
- StrNode* sn = STR_(node);
- int slen = (int )(sn->end - sn->s);
- /* int is_raw = NODE_STRING_IS_RAW(node); */
-
- if (! NODE_STRING_IS_AMBIG(node)) {
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
- if (slen > 0) {
- add_char_opt_map(&opt->map, *(sn->s), enc);
- }
- set_mml(&opt->len, slen, slen);
- }
- else {
- int max;
-
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
- int n = onigenc_strlen(enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
- }
- else {
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
- opt->sb.case_fold = 1;
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- opt->sb.good_case_fold = 1;
-
- if (slen > 0) {
- r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
- enc, env->case_fold_flag);
- if (r != 0) break;
- }
-
- max = slen;
- }
-
- set_mml(&opt->len, slen, max);
- }
- }
- break;
-
- case NODE_CCLASS:
- {
- int z;
- CClassNode* cc = CCLASS_(node);
-
- /* no need to check ignore case. (set in setup_tree()) */
-
- if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
- OnigLen min = ONIGENC_MBC_MINLEN(enc);
- OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
-
- set_mml(&opt->len, min, max);
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = BITSET_AT(cc->bs, i);
- if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
- add_char_opt_map(&opt->map, (UChar )i, enc);
- }
- }
- set_mml(&opt->len, 1, 1);
- }
- }
- break;
-
- case NODE_CTYPE:
- {
- int min, max;
- int range;
-
- max = ONIGENC_MBC_MAXLEN_DIST(enc);
-
- if (max == 1) {
- min = 1;
-
- switch (CTYPE_(node)->ctype) {
- case CTYPE_ANYCHAR:
- break;
-
- case ONIGENC_CTYPE_WORD:
- range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
- if (CTYPE_(node)->not != 0) {
- for (i = 0; i < range; i++) {
- if (! ONIGENC_IS_CODE_WORD(enc, i)) {
- add_char_opt_map(&opt->map, (UChar )i, enc);
- }
- }
- for (i = range; i < SINGLE_BYTE_SIZE; i++) {
- add_char_opt_map(&opt->map, (UChar )i, enc);
- }
- }
- else {
- for (i = 0; i < range; i++) {
- if (ONIGENC_IS_CODE_WORD(enc, i)) {
- add_char_opt_map(&opt->map, (UChar )i, enc);
- }
- }
- }
- break;
- }
- }
- else {
- min = ONIGENC_MBC_MINLEN(enc);
- }
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NODE_ANCHOR:
- switch (ANCHOR_(node)->type) {
- case ANCR_BEGIN_BUF:
- case ANCR_BEGIN_POSITION:
- case ANCR_BEGIN_LINE:
- case ANCR_END_BUF:
- case ANCR_SEMI_END_BUF:
- case ANCR_END_LINE:
- case ANCR_PREC_READ_NOT:
- case ANCR_LOOK_BEHIND:
- add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
- break;
-
- case ANCR_PREC_READ:
- {
- r = optimize_nodes(NODE_BODY(node), &xo, env);
- if (r == 0) {
- if (xo.sb.len > 0)
- copy_opt_exact(&opt->spr, &xo.sb);
- else if (xo.sm.len > 0)
- copy_opt_exact(&opt->spr, &xo.sm);
-
- opt->spr.reach_end = 0;
-
- if (xo.map.value > 0)
- copy_opt_map(&opt->map, &xo.map);
- }
- }
- break;
-
- case ANCR_LOOK_BEHIND_NOT:
- break;
- }
- break;
-
- case NODE_BACKREF:
- if (! NODE_IS_CHECKER(node)) {
- int* backs;
- OnigLen min, max, tmin, tmax;
- MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
- BackRefNode* br = BACKREF_(node);
-
- if (NODE_IS_RECURSION(node)) {
- set_mml(&opt->len, 0, INFINITE_LEN);
- break;
- }
- backs = BACKREFS_P(br);
- min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
- max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
- for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
- tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
- if (min > tmin) min = tmin;
- if (max < tmax) max = tmax;
- }
- set_mml(&opt->len, min, max);
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- if (NODE_IS_RECURSION(node))
- set_mml(&opt->len, 0, INFINITE_LEN);
- else {
- OnigOptionType save = env->options;
- env->options = BAG_(NODE_BODY(node))->o.options;
- r = optimize_nodes(NODE_BODY(node), opt, env);
- env->options = save;
- }
- break;
-#endif
-
- case NODE_QUANT:
- {
- OnigLen min, max;
- QuantNode* qn = QUANT_(node);
-
- r = optimize_nodes(NODE_BODY(node), &xo, env);
- if (r != 0) break;
-
- if (qn->lower > 0) {
- copy_node_opt_info(opt, &xo);
- if (xo.sb.len > 0) {
- if (xo.sb.reach_end) {
- for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {
- int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);
- if (rc > 0) break;
- }
- if (i < qn->lower) opt->sb.reach_end = 0;
- }
- }
-
- if (qn->lower != qn->upper) {
- opt->sb.reach_end = 0;
- opt->sm.reach_end = 0;
- }
- if (qn->lower > 1)
- opt->sm.reach_end = 0;
- }
-
- if (IS_INFINITE_REPEAT(qn->upper)) {
- if (env->mmd.max == 0 &&
- NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
- add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
- else
- add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);
- }
-
- max = (xo.len.max > 0 ? INFINITE_LEN : 0);
- }
- else {
- max = distance_multiply(xo.len.max, qn->upper);
- }
-
- min = distance_multiply(xo.len.min, qn->lower);
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NODE_BAG:
- {
- BagNode* en = BAG_(node);
-
- switch (en->type) {
- case BAG_OPTION:
- {
- OnigOptionType save = env->options;
-
- env->options = en->o.options;
- r = optimize_nodes(NODE_BODY(node), opt, env);
- env->options = save;
- }
- break;
-
- case BAG_MEMORY:
-#ifdef USE_CALL
- en->opt_count++;
- if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
- OnigLen min, max;
-
- min = 0;
- max = INFINITE_LEN;
- if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
- if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
- set_mml(&opt->len, min, max);
- }
- else
-#endif
- {
- r = optimize_nodes(NODE_BODY(node), opt, env);
- if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {
- if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
- remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);
- }
- }
- break;
-
- case BAG_STOP_BACKTRACK:
- r = optimize_nodes(NODE_BODY(node), opt, env);
- break;
-
- case BAG_IF_ELSE:
- {
- OptEnv nenv;
-
- copy_opt_env(&nenv, env);
- r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);
- if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
- concat_left_node_opt_info(enc, opt, &xo);
- if (IS_NOT_NULL(en->te.Then)) {
- r = optimize_nodes(en->te.Then, &xo, &nenv);
- if (r == 0) {
- concat_left_node_opt_info(enc, opt, &xo);
- }
- }
-
- if (IS_NOT_NULL(en->te.Else)) {
- r = optimize_nodes(en->te.Else, &xo, env);
- if (r == 0)
- alt_merge_node_opt_info(opt, &xo, env);
- }
- }
- }
- break;
- }
- }
- break;
-
- case NODE_GIMMICK:
- break;
-
- default:
-#ifdef ONIG_DEBUG
- fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node));
-#endif
- r = ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-set_optimize_exact(regex_t* reg, OptStr* e)
-{
- int r;
-
- if (e->len == 0) return 0;
-
- reg->exact = (UChar* )xmalloc(e->len);
- CHECK_NULL_RETURN_MEMERR(reg->exact);
- xmemcpy(reg->exact, e->s, e->len);
- reg->exact_end = reg->exact + e->len;
-
- if (e->case_fold) {
- reg->optimize = OPTIMIZE_STR_CASE_FOLD;
- if (e->good_case_fold != 0) {
- if (e->len >= 2) {
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,
- reg->exact, reg->exact_end,
- reg->map, &(reg->map_offset));
- if (r != 0) return r;
- reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;
- }
- }
- }
- else {
- int allow_reverse;
-
- allow_reverse =
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
-
- if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,
- reg->exact, reg->exact_end,
- reg->map, &(reg->map_offset));
- if (r != 0) return r;
-
- reg->optimize = (allow_reverse != 0
- ? OPTIMIZE_STR_FAST
- : OPTIMIZE_STR_FAST_STEP_FORWARD);
- }
- else {
- reg->optimize = OPTIMIZE_STR;
- }
- }
-
- reg->dmin = e->mmd.min;
- reg->dmax = e->mmd.max;
-
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
- }
-
- return 0;
-}
-
-static void
-set_optimize_map(regex_t* reg, OptMap* m)
-{
- int i;
-
- for (i = 0; i < CHAR_MAP_SIZE; i++)
- reg->map[i] = m->map[i];
-
- reg->optimize = OPTIMIZE_MAP;
- reg->dmin = m->mmd.min;
- reg->dmax = m->mmd.max;
-
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + 1;
- }
-}
-
-static void
-set_sub_anchor(regex_t* reg, OptAnc* anc)
-{
- reg->sub_anchor |= anc->left & ANCR_BEGIN_LINE;
- reg->sub_anchor |= anc->right & ANCR_END_LINE;
-}
-
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
-static void print_optimize_info(FILE* f, regex_t* reg);
-#endif
-
-static int
-set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
-{
- int r;
- OptNode opt;
- OptEnv env;
-
- env.enc = reg->enc;
- env.options = reg->options;
- env.case_fold_flag = reg->case_fold_flag;
- env.scan_env = scan_env;
- clear_mml(&env.mmd);
-
- r = optimize_nodes(node, &opt, &env);
- if (r != 0) return r;
-
- reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |
- ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |
- ANCR_LOOK_BEHIND);
-
- if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)
- reg->anchor &= ~ANCR_ANYCHAR_INF_ML;
-
- reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |
- ANCR_PREC_READ_NOT);
-
- if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
- reg->anchor_dmin = opt.len.min;
- reg->anchor_dmax = opt.len.max;
- }
-
- if (opt.sb.len > 0 || opt.sm.len > 0) {
- select_opt_exact(reg->enc, &opt.sb, &opt.sm);
- if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {
- goto set_map;
- }
- else {
- r = set_optimize_exact(reg, &opt.sb);
- set_sub_anchor(reg, &opt.sb.anc);
- }
- }
- else if (opt.map.value > 0) {
- set_map:
- set_optimize_map(reg, &opt.map);
- set_sub_anchor(reg, &opt.map.anc);
- }
- else {
- reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
- if (opt.len.max == 0)
- reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;
- }
-
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
- print_optimize_info(stderr, reg);
-#endif
- return r;
-}
-
-static void
-clear_optimize_info(regex_t* reg)
-{
- reg->optimize = OPTIMIZE_NONE;
- reg->anchor = 0;
- reg->anchor_dmin = 0;
- reg->anchor_dmax = 0;
- reg->sub_anchor = 0;
- reg->exact_end = (UChar* )NULL;
- reg->map_offset = 0;
- reg->threshold_len = 0;
- if (IS_NOT_NULL(reg->exact)) {
- xfree(reg->exact);
- reg->exact = (UChar* )NULL;
- }
-}
-
-#ifdef ONIG_DEBUG
-
-static void print_enc_string(FILE* fp, OnigEncoding enc,
- const UChar *s, const UChar *end)
-{
- fprintf(fp, "\nPATTERN: /");
-
- if (ONIGENC_MBC_MINLEN(enc) > 1) {
- const UChar *p;
- OnigCodePoint code;
-
- p = s;
- while (p < end) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (code >= 0x80) {
- fprintf(fp, " 0x%04x ", (int )code);
- }
- else {
- fputc((int )code, fp);
- }
-
- p += enclen(enc, p);
- }
- }
- else {
- while (s < end) {
- fputc((int )*s, fp);
- s++;
- }
- }
-
- fprintf(fp, "/\n");
-}
-
-#endif /* ONIG_DEBUG */
-
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
-
-static void
-print_distance_range(FILE* f, OnigLen a, OnigLen b)
-{
- if (a == INFINITE_LEN)
- fputs("inf", f);
- else
- fprintf(f, "(%u)", a);
-
- fputs("-", f);
-
- if (b == INFINITE_LEN)
- fputs("inf", f);
- else
- fprintf(f, "(%u)", b);
-}
-
-static void
-print_anchor(FILE* f, int anchor)
-{
- int q = 0;
-
- fprintf(f, "[");
-
- if (anchor & ANCR_BEGIN_BUF) {
- fprintf(f, "begin-buf");
- q = 1;
- }
- if (anchor & ANCR_BEGIN_LINE) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "begin-line");
- }
- if (anchor & ANCR_BEGIN_POSITION) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "begin-pos");
- }
- if (anchor & ANCR_END_BUF) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "end-buf");
- }
- if (anchor & ANCR_SEMI_END_BUF) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "semi-end-buf");
- }
- if (anchor & ANCR_END_LINE) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "end-line");
- }
- if (anchor & ANCR_ANYCHAR_INF) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "anychar-inf");
- }
- if (anchor & ANCR_ANYCHAR_INF_ML) {
- if (q) fprintf(f, ", ");
- fprintf(f, "anychar-inf-ml");
- }
-
- fprintf(f, "]");
-}
-
-static void
-print_optimize_info(FILE* f, regex_t* reg)
-{
- static const char* on[] = { "NONE", "STR",
- "STR_FAST", "STR_FAST_STEP_FORWARD",
- "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };
-
- fprintf(f, "optimize: %s\n", on[reg->optimize]);
- fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
- if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
- print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
- fprintf(f, "\n");
-
- if (reg->optimize) {
- fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
- fprintf(f, "\n");
- }
- fprintf(f, "\n");
-
- if (reg->exact) {
- UChar *p;
- fprintf(f, "exact: [");
- for (p = reg->exact; p < reg->exact_end; p++) {
- fputc(*p, f);
- }
- fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
- }
- else if (reg->optimize & OPTIMIZE_MAP) {
- int c, i, n = 0;
-
- for (i = 0; i < CHAR_MAP_SIZE; i++)
- if (reg->map[i]) n++;
-
- fprintf(f, "map: n=%d\n", n);
- if (n > 0) {
- c = 0;
- fputc('[', f);
- for (i = 0; i < CHAR_MAP_SIZE; i++) {
- if (reg->map[i] != 0) {
- if (c > 0) fputs(", ", f);
- c++;
- if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
- ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
- fputc(i, f);
- else
- fprintf(f, "%d", i);
- }
- }
- fprintf(f, "]\n");
- }
- }
-}
-#endif
-
-
-extern RegexExt*
-onig_get_regex_ext(regex_t* reg)
-{
- if (IS_NULL(reg->extp)) {
- RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
- if (IS_NULL(ext)) return 0;
-
- ext->pattern = 0;
- ext->pattern_end = 0;
-#ifdef USE_CALLOUT
- ext->tag_table = 0;
- ext->callout_num = 0;
- ext->callout_list_alloc = 0;
- ext->callout_list = 0;
-#endif
-
- reg->extp = ext;
- }
-
- return reg->extp;
-}
-
-static void
-free_regex_ext(RegexExt* ext)
-{
- if (IS_NOT_NULL(ext)) {
- if (IS_NOT_NULL(ext->pattern))
- xfree((void* )ext->pattern);
-
-#ifdef USE_CALLOUT
- if (IS_NOT_NULL(ext->tag_table))
- onig_callout_tag_table_free(ext->tag_table);
-
- if (IS_NOT_NULL(ext->callout_list))
- onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
-#endif
-
- xfree(ext);
- }
-}
-
-extern int
-onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
-{
- RegexExt* ext;
- UChar* s;
-
- ext = onig_get_regex_ext(reg);
- CHECK_NULL_RETURN_MEMERR(ext);
-
- s = onigenc_strdup(reg->enc, pattern, pattern_end);
- CHECK_NULL_RETURN_MEMERR(s);
-
- ext->pattern = s;
- ext->pattern_end = s + (pattern_end - pattern);
-
- return ONIG_NORMAL;
-}
-
-extern void
-onig_free_body(regex_t* reg)
-{
- if (IS_NOT_NULL(reg)) {
- ops_free(reg);
- if (IS_NOT_NULL(reg->string_pool)) {
- xfree(reg->string_pool);
- reg->string_pool_end = reg->string_pool = 0;
- }
- if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->extp)) {
- free_regex_ext(reg->extp);
- reg->extp = 0;
- }
-
- onig_names_free(reg);
- }
-}
-
-extern void
-onig_free(regex_t* reg)
-{
- if (IS_NOT_NULL(reg)) {
- onig_free_body(reg);
- xfree(reg);
- }
-}
-
-
-#ifdef ONIG_DEBUG_PARSE
-static void print_tree P_((FILE* f, Node* node));
-#endif
-
-extern int onig_init_for_match_at(regex_t* reg);
-
-extern int
-onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigErrorInfo* einfo)
-{
- int r;
- Node* root;
- ScanEnv scan_env;
-#ifdef USE_CALL
- UnsetAddrList uslist;
-#endif
-
- root = 0;
- if (IS_NOT_NULL(einfo)) {
- einfo->enc = reg->enc;
- einfo->par = (UChar* )NULL;
- }
-
-#ifdef ONIG_DEBUG
- print_enc_string(stderr, reg->enc, pattern, pattern_end);
-#endif
-
- if (reg->ops_alloc == 0) {
- r = ops_init(reg, OPS_INIT_SIZE);
- if (r != 0) goto end;
- }
- else
- reg->ops_used = 0;
-
- reg->string_pool = 0;
- reg->string_pool_end = 0;
- reg->num_mem = 0;
- reg->num_repeat = 0;
- reg->num_null_check = 0;
- reg->repeat_range_alloc = 0;
- reg->repeat_range = (OnigRepeatRange* )NULL;
-
- r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
- if (r != 0) goto err;
-
- /* mixed use named group and no-named group */
- if (scan_env.num_named > 0 &&
- IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
- if (scan_env.num_named != scan_env.num_mem)
- r = disable_noname_group_capture(&root, reg, &scan_env);
- else
- r = numbered_ref_check(root);
-
- if (r != 0) goto err;
- }
-
- r = check_backrefs(root, &scan_env);
- if (r != 0) goto err;
-
-#ifdef USE_CALL
- if (scan_env.num_call > 0) {
- r = unset_addr_list_init(&uslist, scan_env.num_call);
- if (r != 0) goto err;
- scan_env.unset_addr_list = &uslist;
- r = setup_call(root, &scan_env, 0);
- if (r != 0) goto err_unset;
- r = setup_call2(root);
- if (r != 0) goto err_unset;
- r = recursive_call_check_trav(root, &scan_env, 0);
- if (r < 0) goto err_unset;
- r = infinite_recursive_call_check_trav(root, &scan_env);
- if (r != 0) goto err_unset;
-
- setup_called_state(root, 0);
- }
-
- reg->num_call = scan_env.num_call;
-#endif
-
- r = setup_tree(root, reg, 0, &scan_env);
- if (r != 0) goto err_unset;
-
-#ifdef ONIG_DEBUG_PARSE
- print_tree(stderr, root);
-#endif
-
- reg->capture_history = scan_env.capture_history;
- reg->bt_mem_start = scan_env.bt_mem_start;
- reg->bt_mem_start |= reg->capture_history;
- if (IS_FIND_CONDITION(reg->options))
- MEM_STATUS_ON_ALL(reg->bt_mem_end);
- else {
- reg->bt_mem_end = scan_env.bt_mem_end;
- reg->bt_mem_end |= reg->capture_history;
- }
- reg->bt_mem_start |= reg->bt_mem_end;
-
- clear_optimize_info(reg);
-#ifndef ONIG_DONT_OPTIMIZE
- r = set_optimize_info_from_tree(root, reg, &scan_env);
- if (r != 0) goto err_unset;
-#endif
-
- if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
- xfree(scan_env.mem_env_dynamic);
- scan_env.mem_env_dynamic = (MemEnv* )NULL;
- }
-
- r = compile_tree(root, reg, &scan_env);
- if (r == 0) {
- if (scan_env.keep_num > 0) {
- r = add_op(reg, OP_UPDATE_VAR);
- if (r != 0) goto err;
-
- COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;
- COP(reg)->update_var.id = 0; /* not used */
- }
-
- r = add_op(reg, OP_END);
- if (r != 0) goto err;
-
-#ifdef USE_CALL
- if (scan_env.num_call > 0) {
- r = fix_unset_addr_list(&uslist, reg);
- unset_addr_list_end(&uslist);
- if (r != 0) goto err;
- }
-#endif
-
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
-#ifdef USE_CALLOUT
- || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
-#endif
- )
- reg->stack_pop_level = STACK_POP_LEVEL_ALL;
- else {
- if (reg->bt_mem_start != 0)
- reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
- else
- reg->stack_pop_level = STACK_POP_LEVEL_FREE;
- }
-
- r = ops_make_string_pool(reg);
- if (r != 0) goto err;
- }
-#ifdef USE_CALL
- else if (scan_env.num_call > 0) {
- unset_addr_list_end(&uslist);
- }
-#endif
- onig_node_free(root);
-
-#ifdef ONIG_DEBUG_COMPILE
- onig_print_names(stderr, reg);
- onig_print_compiled_byte_code_list(stderr, reg);
-#endif
-
-#ifdef USE_DIRECT_THREADED_CODE
- /* opcode -> opaddr */
- onig_init_for_match_at(reg);
-#endif
-
- end:
- return r;
-
- err_unset:
-#ifdef USE_CALL
- if (scan_env.num_call > 0) {
- unset_addr_list_end(&uslist);
- }
-#endif
- err:
- if (IS_NOT_NULL(scan_env.error)) {
- if (IS_NOT_NULL(einfo)) {
- einfo->par = scan_env.error;
- einfo->par_end = scan_env.error_end;
- }
- }
-
- onig_node_free(root);
- if (IS_NOT_NULL(scan_env.mem_env_dynamic))
- xfree(scan_env.mem_env_dynamic);
- return r;
-}
-
-
-static int onig_inited = 0;
-
-extern int
-onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
- OnigEncoding enc, OnigSyntaxType* syntax)
-{
- int r;
-
- xmemset(reg, 0, sizeof(*reg));
-
- if (onig_inited == 0) {
-#if 0
- return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
-#else
- r = onig_initialize(&enc, 1);
- if (r != 0)
- return ONIGERR_FAIL_TO_INITIALIZE;
-
- onig_warning("You didn't call onig_initialize() explicitly");
-#endif
- }
-
- if (IS_NULL(reg))
- return ONIGERR_INVALID_ARGUMENT;
-
- if (ONIGENC_IS_UNDEF(enc))
- return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
-
- if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
- == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
- }
-
- if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
- option |= syntax->options;
- option &= ~ONIG_OPTION_SINGLELINE;
- }
- else
- option |= syntax->options;
-
- (reg)->enc = enc;
- (reg)->options = option;
- (reg)->syntax = syntax;
- (reg)->optimize = 0;
- (reg)->exact = (UChar* )NULL;
- (reg)->extp = (RegexExt* )NULL;
-
- (reg)->ops = (Operation* )NULL;
- (reg)->ops_curr = (Operation* )NULL;
- (reg)->ops_used = 0;
- (reg)->ops_alloc = 0;
- (reg)->name_table = (void* )NULL;
-
- (reg)->case_fold_flag = case_fold_flag;
- return 0;
-}
-
-extern int
-onig_new_without_alloc(regex_t* reg,
- const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)
-{
- int r;
-
- r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r != 0) return r;
-
- r = onig_compile(reg, pattern, pattern_end, einfo);
- return r;
-}
-
-extern int
-onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
-
- *reg = (regex_t* )xmalloc(sizeof(regex_t));
- if (IS_NULL(*reg)) return ONIGERR_MEMORY;
-
- r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r != 0) goto err;
-
- r = onig_compile(*reg, pattern, pattern_end, einfo);
- if (r != 0) {
- err:
- onig_free(*reg);
- *reg = NULL;
- }
- return r;
-}
-
-extern int
-onig_initialize(OnigEncoding encodings[], int n)
-{
- int i;
- int r;
-
- if (onig_inited != 0)
- return 0;
-
- onigenc_init();
-
- onig_inited = 1;
-
- for (i = 0; i < n; i++) {
- OnigEncoding enc = encodings[i];
- r = onig_initialize_encoding(enc);
- if (r != 0)
- return r;
- }
-
- return ONIG_NORMAL;
-}
-
-typedef struct EndCallListItem {
- struct EndCallListItem* next;
- void (*func)(void);
-} EndCallListItemType;
-
-static EndCallListItemType* EndCallTop;
-
-extern void onig_add_end_call(void (*func)(void))
-{
- EndCallListItemType* item;
-
- item = (EndCallListItemType* )xmalloc(sizeof(*item));
- if (item == 0) return ;
-
- item->next = EndCallTop;
- item->func = func;
-
- EndCallTop = item;
-}
-
-static void
-exec_end_call_list(void)
-{
- EndCallListItemType* prev;
- void (*func)(void);
-
- while (EndCallTop != 0) {
- func = EndCallTop->func;
- (*func)();
-
- prev = EndCallTop;
- EndCallTop = EndCallTop->next;
- xfree(prev);
- }
-}
-
-extern int
-onig_end(void)
-{
- exec_end_call_list();
-
-#ifdef USE_CALLOUT
- onig_global_callout_names_free();
-#endif
-
- onigenc_end();
-
- onig_inited = 0;
-
- return 0;
-}
-
-extern int
-onig_is_in_code_range(const UChar* p, OnigCodePoint code)
-{
- OnigCodePoint n, *data;
- OnigCodePoint low, high, x;
-
- GET_CODE_POINT(n, p);
- data = (OnigCodePoint* )p;
- data++;
-
- for (low = 0, high = n; low < high; ) {
- x = (low + high) >> 1;
- if (code > data[x * 2 + 1])
- low = x + 1;
- else
- high = x;
- }
-
- return ((low < n && code >= data[low * 2]) ? 1 : 0);
-}
-
-extern int
-onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
-{
- int found;
- CClassNode* cc = (CClassNode* )cc_arg;
-
- if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = onig_is_in_code_range(cc->mbuf->p, code) != 0;
- }
- }
- else {
- found = BITSET_AT(cc->bs, code) != 0;
- }
-
- if (IS_NCCLASS_NOT(cc))
- return !found;
- else
- return found;
-}
-
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int len;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1) {
- len = 2;
- }
- else {
- len = ONIGENC_CODE_TO_MBCLEN(enc, code);
- if (len < 0) return 0;
- }
- return onig_is_code_in_cc_len(len, code, cc);
-}
-
-
-#ifdef ONIG_DEBUG_PARSE
-
-static void
-p_string(FILE* f, int len, UChar* s)
-{
- fputs(":", f);
- while (len-- > 0) { fputc(*s++, f); }
-}
-
-static void
-Indent(FILE* f, int indent)
-{
- int i;
- for (i = 0; i < indent; i++) putc(' ', f);
-}
-
-static void
-print_indent_tree(FILE* f, Node* node, int indent)
-{
- int i;
- NodeType type;
- UChar* p;
- int add = 3;
-
- Indent(f, indent);
- if (IS_NULL(node)) {
- fprintf(f, "ERROR: null node!!!\n");
- exit (0);
- }
-
- type = NODE_TYPE(node);
- switch (type) {
- case NODE_LIST:
- case NODE_ALT:
- if (type == NODE_LIST)
- fprintf(f, "<list:%p>\n", node);
- else
- fprintf(f, "<alt:%p>\n", node);
-
- print_indent_tree(f, NODE_CAR(node), indent + add);
- while (IS_NOT_NULL(node = NODE_CDR(node))) {
- if (NODE_TYPE(node) != type) {
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
- exit(0);
- }
- print_indent_tree(f, NODE_CAR(node), indent + add);
- }
- break;
-
- case NODE_STRING:
- {
- char* mode;
- char* dont;
- char* good;
-
- if (NODE_STRING_IS_RAW(node))
- mode = "-raw";
- else if (NODE_STRING_IS_AMBIG(node))
- mode = "-ambig";
- else
- mode = "";
-
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- good = "-good";
- else
- good = "";
-
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))
- dont = " (dont-opt)";
- else
- dont = "";
-
- fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);
- for (p = STR_(node)->s; p < STR_(node)->end; p++) {
- if (*p >= 0x20 && *p < 0x7f)
- fputc(*p, f);
- else {
- fprintf(f, " 0x%02x", *p);
- }
- }
- }
- break;
-
- case NODE_CCLASS:
- fprintf(f, "<cclass:%p>", node);
- if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
- if (CCLASS_(node)->mbuf) {
- BBuf* bbuf = CCLASS_(node)->mbuf;
- for (i = 0; i < bbuf->used; i++) {
- if (i > 0) fprintf(f, ",");
- fprintf(f, "%0x", bbuf->p[i]);
- }
- }
- break;
-
- case NODE_CTYPE:
- fprintf(f, "<ctype:%p> ", node);
- switch (CTYPE_(node)->ctype) {
- case CTYPE_ANYCHAR:
- fprintf(f, "<anychar:%p>", node);
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (CTYPE_(node)->not != 0)
- fputs("not word", f);
- else
- fputs("word", f);
-
- if (CTYPE_(node)->ascii_mode != 0)
- fputs(" (ascii)", f);
-
- break;
-
- default:
- fprintf(f, "ERROR: undefined ctype.\n");
- exit(0);
- }
- break;
-
- case NODE_ANCHOR:
- fprintf(f, "<anchor:%p> ", node);
- switch (ANCHOR_(node)->type) {
- case ANCR_BEGIN_BUF: fputs("begin buf", f); break;
- case ANCR_END_BUF: fputs("end buf", f); break;
- case ANCR_BEGIN_LINE: fputs("begin line", f); break;
- case ANCR_END_LINE: fputs("end line", f); break;
- case ANCR_SEMI_END_BUF: fputs("semi end buf", f); break;
- case ANCR_BEGIN_POSITION: fputs("begin position", f); break;
-
- case ANCR_WORD_BOUNDARY: fputs("word boundary", f); break;
- case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
-#ifdef USE_WORD_BEGIN_END
- case ANCR_WORD_BEGIN: fputs("word begin", f); break;
- case ANCR_WORD_END: fputs("word end", f); break;
-#endif
- case ANCR_TEXT_SEGMENT_BOUNDARY:
- fputs("text-segment boundary", f); break;
- case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
- fputs("no text-segment boundary", f); break;
- case ANCR_PREC_READ:
- fprintf(f, "prec read\n");
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
- case ANCR_PREC_READ_NOT:
- fprintf(f, "prec read not\n");
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
- case ANCR_LOOK_BEHIND:
- fprintf(f, "look behind\n");
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
- case ANCR_LOOK_BEHIND_NOT:
- fprintf(f, "look behind not\n");
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
-
- default:
- fprintf(f, "ERROR: undefined anchor type.\n");
- break;
- }
- break;
-
- case NODE_BACKREF:
- {
- int* p;
- BackRefNode* br = BACKREF_(node);
- p = BACKREFS_P(br);
- fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node);
- for (i = 0; i < br->back_num; i++) {
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", p[i]);
- }
- }
- break;
-
-#ifdef USE_CALL
- case NODE_CALL:
- {
- CallNode* cn = CALL_(node);
- fprintf(f, "<call:%p>", node);
- p_string(f, cn->name_end - cn->name, cn->name);
- }
- break;
-#endif
-
- case NODE_QUANT:
- fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
- QUANT_(node)->lower, QUANT_(node)->upper,
- (QUANT_(node)->greedy ? "" : "?"));
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
-
- case NODE_BAG:
- fprintf(f, "<bag:%p> ", node);
- switch (BAG_(node)->type) {
- case BAG_OPTION:
- fprintf(f, "option:%d", BAG_(node)->o.options);
- break;
- case BAG_MEMORY:
- fprintf(f, "memory:%d", BAG_(node)->m.regnum);
- break;
- case BAG_STOP_BACKTRACK:
- fprintf(f, "stop-bt");
- break;
- case BAG_IF_ELSE:
- fprintf(f, "if-else");
- break;
- }
- fprintf(f, "\n");
- print_indent_tree(f, NODE_BODY(node), indent + add);
- break;
-
- case NODE_GIMMICK:
- fprintf(f, "<gimmick:%p> ", node);
- switch (GIMMICK_(node)->type) {
- case GIMMICK_FAIL:
- fprintf(f, "fail");
- break;
- case GIMMICK_SAVE:
- fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
- break;
- case GIMMICK_UPDATE_VAR:
- fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
- break;
-#ifdef USE_CALLOUT
- case GIMMICK_CALLOUT:
- switch (GIMMICK_(node)->detail_type) {
- case ONIG_CALLOUT_OF_CONTENTS:
- fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
- break;
- case ONIG_CALLOUT_OF_NAME:
- fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
- break;
- }
-#endif
- }
- break;
-
- default:
- fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
- break;
- }
-
- if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
- type != NODE_BAG)
- fprintf(f, "\n");
- fflush(f);
-}
-
-static void
-print_tree(FILE* f, Node* node)
-{
- print_indent_tree(f, node, 0);
-}
-#endif
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
deleted file mode 100644
index 079cd01f29f7..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regenc.c
+++ /dev/null
@@ -1,1029 +0,0 @@
-/**********************************************************************
- regenc.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h"
-
-OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
-
-#define INITED_LIST_SIZE 20
-
-static int InitedListNum;
-
-static struct {
- OnigEncoding enc;
- int inited;
-} InitedList[INITED_LIST_SIZE];
-
-static int
-enc_inited_entry(OnigEncoding enc)
-{
- int i;
-
- for (i = 0; i < InitedListNum; i++) {
- if (InitedList[i].enc == enc) {
- InitedList[i].inited = 1;
- return i;
- }
- }
-
- i = InitedListNum;
- if (i < INITED_LIST_SIZE - 1) {
- InitedList[i].enc = enc;
- InitedList[i].inited = 1;
- InitedListNum++;
- return i;
- }
-
- return -1;
-}
-
-static int
-enc_is_inited(OnigEncoding enc)
-{
- int i;
-
- for (i = 0; i < InitedListNum; i++) {
- if (InitedList[i].enc == enc) {
- return InitedList[i].inited;
- }
- }
-
- return 0;
-}
-
-static int OnigEncInited;
-
-extern int
-onigenc_init(void)
-{
- if (OnigEncInited != 0) return 0;
-
- OnigEncInited = 1;
- return 0;
-}
-
-extern int
-onigenc_end(void)
-{
- int i;
-
- for (i = 0; i < InitedListNum; i++) {
- InitedList[i].enc = 0;
- InitedList[i].inited = 0;
- }
- InitedListNum = 0;
-
- OnigEncInited = 0;
- return ONIG_NORMAL;
-}
-
-extern int
-onig_initialize_encoding(OnigEncoding enc)
-{
- int r;
-
- if (enc != ONIG_ENCODING_ASCII &&
- ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
- OnigEncoding ascii = ONIG_ENCODING_ASCII;
- if (ascii->init != 0 && enc_is_inited(ascii) == 0) {
- r = ascii->init();
- if (r != ONIG_NORMAL) return r;
- enc_inited_entry(ascii);
- }
- }
-
- if (enc->init != 0 &&
- enc_is_inited(enc) == 0) {
- r = (enc->init)();
- if (r == ONIG_NORMAL)
- enc_inited_entry(enc);
- return r;
- }
-
- return 0;
-}
-
-extern OnigEncoding
-onigenc_get_default_encoding(void)
-{
- return OnigEncDefaultCharEncoding;
-}
-
-extern int
-onigenc_set_default_encoding(OnigEncoding enc)
-{
- OnigEncDefaultCharEncoding = enc;
- return 0;
-}
-
-extern UChar*
-onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end)
-{
- int slen, term_len, i;
- UChar *r;
-
- slen = (int )(end - s);
- term_len = ONIGENC_MBC_MINLEN(enc);
-
- r = (UChar* )xmalloc(slen + term_len);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, slen);
-
- for (i = 0; i < term_len; i++)
- r[slen + i] = (UChar )0;
-
- return r;
-}
-
-extern UChar*
-onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
-{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
- if (p < s) {
- p += enclen(enc, p);
- }
- return p;
-}
-
-extern UChar*
-onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
- const UChar* start, const UChar* s, const UChar** prev)
-{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
-
- if (p < s) {
- if (prev) *prev = (const UChar* )p;
- p += enclen(enc, p);
- }
- else {
- if (prev) *prev = (const UChar* )NULL; /* Sorry */
- }
- return p;
-}
-
-extern UChar*
-onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
-{
- if (s <= start)
- return (UChar* )NULL;
-
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
-}
-
-extern UChar*
-onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
-{
- while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
- if (s <= start)
- return (UChar* )NULL;
-
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
- }
- return (UChar* )s;
-}
-
-#if 0
-extern int
-onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int len;
- int n;
-
- len = ONIGENC_MBC_ENC_LEN(enc, p);
- n = (int )(end - p);
-
- return (n < len ? n : len);
-}
-#endif
-
-extern UChar*
-onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
-{
- UChar* q = (UChar* )p;
- while (n-- > 0) {
- q += ONIGENC_MBC_ENC_LEN(enc, q);
- }
- return (q <= end ? q : NULL);
-}
-
-extern int
-onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int n = 0;
- UChar* q = (UChar* )p;
-
- while (q < end) {
- q += ONIGENC_MBC_ENC_LEN(enc, q);
- n++;
- }
- return n;
-}
-
-extern int
-onigenc_strlen_null(OnigEncoding enc, const UChar* s)
-{
- int n = 0;
- UChar* p = (UChar* )s;
-
- while (1) {
- if (*p == '\0') {
- UChar* q;
- int len = ONIGENC_MBC_MINLEN(enc);
-
- if (len == 1) return n;
- q = p + 1;
- while (len > 1) {
- if (*q != '\0') break;
- q++;
- len--;
- }
- if (len == 1) return n;
- }
- p += ONIGENC_MBC_ENC_LEN(enc, p);
- n++;
- }
-}
-
-extern int
-onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
-{
- UChar* start = (UChar* )s;
- UChar* p = (UChar* )s;
-
- while (1) {
- if (*p == '\0') {
- UChar* q;
- int len = ONIGENC_MBC_MINLEN(enc);
-
- if (len == 1) return (int )(p - start);
- q = p + 1;
- while (len > 1) {
- if (*q != '\0') break;
- q++;
- len--;
- }
- if (len == 1) return (int )(p - start);
- }
- p += ONIGENC_MBC_ENC_LEN(enc, p);
- }
-}
-
-const UChar OnigEncAsciiToLowerCaseTable[] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-
-#ifdef USE_UPPER_CASE_TABLE
-const UChar OnigEncAsciiToUpperCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
- '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-#endif
-
-const unsigned short OnigEncAsciiCtypeTable[256] = {
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
- 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
- 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
-};
-
-const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
-};
-
-#ifdef USE_UPPER_CASE_TABLE
-const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
- '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
-};
-#endif
-
-extern void
-onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
-{
- /* nothing */
- /* obsoleted. */
-}
-
-extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
-{
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
-}
-
-const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
- { 0x41, 0x61 },
- { 0x42, 0x62 },
- { 0x43, 0x63 },
- { 0x44, 0x64 },
- { 0x45, 0x65 },
- { 0x46, 0x66 },
- { 0x47, 0x67 },
- { 0x48, 0x68 },
- { 0x49, 0x69 },
- { 0x4a, 0x6a },
- { 0x4b, 0x6b },
- { 0x4c, 0x6c },
- { 0x4d, 0x6d },
- { 0x4e, 0x6e },
- { 0x4f, 0x6f },
- { 0x50, 0x70 },
- { 0x51, 0x71 },
- { 0x52, 0x72 },
- { 0x53, 0x73 },
- { 0x54, 0x74 },
- { 0x55, 0x75 },
- { 0x56, 0x76 },
- { 0x57, 0x77 },
- { 0x58, 0x78 },
- { 0x59, 0x79 },
- { 0x5a, 0x7a }
-};
-
-extern int
-onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
- OnigApplyAllCaseFoldFunc f, void* arg)
-{
- OnigCodePoint code;
- int i, r;
-
- for (i = 0;
- i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
- i++) {
- code = OnigAsciiLowerMap[i].to;
- r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
- if (r != 0) return r;
-
- code = OnigAsciiLowerMap[i].from;
- r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
- if (r != 0) return r;
- }
-
- return 0;
-}
-
-extern int
-onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
- const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
- OnigCaseFoldCodeItem items[])
-{
- if (0x41 <= *p && *p <= 0x5a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p + 0x20);
- return 1;
- }
- else if (0x61 <= *p && *p <= 0x7a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p - 0x20);
- return 1;
- }
- else
- return 0;
-}
-
-static int
-ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
- OnigApplyAllCaseFoldFunc f, void* arg)
-{
- static OnigCodePoint ss[] = { 0x73, 0x73 };
-
- return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
-}
-
-extern int
-onigenc_apply_all_case_fold_with_map(int map_size,
- const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag,
- OnigApplyAllCaseFoldFunc f, void* arg)
-{
- OnigCodePoint code;
- int i, r;
-
- r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
- if (r != 0) return r;
-
- for (i = 0; i < map_size; i++) {
- code = map[i].to;
- r = (*f)(map[i].from, &code, 1, arg);
- if (r != 0) return r;
-
- code = map[i].from;
- r = (*f)(map[i].to, &code, 1, arg);
- if (r != 0) return r;
- }
-
- if (ess_tsett_flag != 0)
- return ss_apply_all_case_fold(flag, f, arg);
-
- return 0;
-}
-
-extern int
-onigenc_get_case_fold_codes_by_str_with_map(int map_size,
- const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
- const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
-{
- if (0x41 <= *p && *p <= 0x5a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p + 0x20);
- if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
- /* SS */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
- }
- else if (0x61 <= *p && *p <= 0x7a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p - 0x20);
- if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
- /* ss */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
- }
- else if (*p == 0xdf && ess_tsett_flag != 0) {
- items[0].byte_len = 1;
- items[0].code_len = 2;
- items[0].code[0] = (OnigCodePoint )'s';
- items[0].code[1] = (OnigCodePoint )'s';
-
- items[1].byte_len = 1;
- items[1].code_len = 2;
- items[1].code[0] = (OnigCodePoint )'S';
- items[1].code[1] = (OnigCodePoint )'S';
-
- items[2].byte_len = 1;
- items[2].code_len = 2;
- items[2].code[0] = (OnigCodePoint )'s';
- items[2].code[1] = (OnigCodePoint )'S';
-
- items[3].byte_len = 1;
- items[3].code_len = 2;
- items[3].code[0] = (OnigCodePoint )'S';
- items[3].code[1] = (OnigCodePoint )'s';
-
- return 4;
- }
- else {
- int i;
-
- for (i = 0; i < map_size; i++) {
- if (*p == map[i].from) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].to;
- return 1;
- }
- else if (*p == map[i].to) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].from;
- return 1;
- }
- }
- }
-
- return 0;
-}
-
-
-extern int
-onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
- OnigCodePoint* sb_out ARG_UNUSED,
- const OnigCodePoint* ranges[] ARG_UNUSED)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
-{
- if (p < end) {
- if (*p == 0x0a) return 1;
- }
- return 0;
-}
-
-/* for single byte encodings */
-extern int
-onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
- const UChar*end ARG_UNUSED, UChar* lower)
-{
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
-
- (*p)++;
- return 1; /* return byte length of converted char to lower */
-}
-
-#if 0
-extern int
-onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp)++;
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
-}
-#endif
-
-extern int
-onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
-{
- return 1;
-}
-
-extern OnigCodePoint
-onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
-{
- return (OnigCodePoint )(*p);
-}
-
-extern int
-onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
-{
- return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
-}
-
-extern int
-onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
-{
- *buf = (UChar )(code & 0xff);
- return 1;
-}
-
-extern UChar*
-onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
- const UChar* s)
-{
- return (UChar* )s;
-}
-
-extern int
-onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
- const UChar* end ARG_UNUSED)
-{
- return TRUE;
-}
-
-extern int
-onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
- const UChar* end ARG_UNUSED)
-{
- return FALSE;
-}
-
-extern int
-onigenc_always_true_is_valid_mbc_string(const UChar* s ARG_UNUSED,
- const UChar* end ARG_UNUSED)
-{
- return TRUE;
-}
-
-extern int
-onigenc_length_check_is_valid_mbc_string(OnigEncoding enc,
- const UChar* p, const UChar* end)
-{
- while (p < end) {
- p += enclen(enc, p);
- }
-
- if (p != end)
- return FALSE;
- else
- return TRUE;
-}
-
-extern int
-onigenc_is_valid_mbc_string(OnigEncoding enc, const UChar* s, const UChar* end)
-{
- return ONIGENC_IS_VALID_MBC_STRING(enc, s, end);
-}
-
-extern OnigCodePoint
-onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int c, i, len;
- OnigCodePoint n;
-
- len = enclen(enc, p);
- n = (OnigCodePoint )(*p++);
- if (len == 1) return n;
-
- for (i = 1; i < len; i++) {
- if (p >= end) break;
- c = *p++;
- n <<= 8; n += c;
- }
- return n;
-}
-
-extern int
-onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
- const UChar** pp, const UChar* end ARG_UNUSED,
- UChar* lower)
-{
- int len;
- const UChar *p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- (*pp)++;
- return 1;
- }
- else {
- int i;
-
- len = enclen(enc, p);
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
- (*pp) += len;
- return len; /* return byte length of converted to lower char */
- }
-}
-
-#if 0
-extern int
-onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
- (*pp)++;
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
-
- (*pp) += enclen(enc, p);
- return FALSE;
-}
-#endif
-
-extern int
-onigenc_mb2_code_to_mbclen(OnigCodePoint code)
-{
- if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
-
- if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
-extern int
-onigenc_mb4_code_to_mbclen(OnigCodePoint code)
-{
- if ((code & 0xff000000) != 0) return 4;
- else if ((code & 0xff0000) != 0) return 3;
- else if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
-extern int
-onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
- UChar *p = buf;
-
- if ((code & 0xff00) != 0) {
- *p++ = (UChar )((code >> 8) & 0xff);
- }
- *p++ = (UChar )(code & 0xff);
-
-#if 1
- if (enclen(enc, buf) != (p - buf))
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-#endif
- return (int )(p - buf);
-}
-
-extern int
-onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
- UChar *p = buf;
-
- if ((code & 0xff000000) != 0) {
- *p++ = (UChar )((code >> 24) & 0xff);
- }
- if ((code & 0xff0000) != 0 || p != buf) {
- *p++ = (UChar )((code >> 16) & 0xff);
- }
- if ((code & 0xff00) != 0 || p != buf) {
- *p++ = (UChar )((code >> 8) & 0xff);
- }
- *p++ = (UChar )(code & 0xff);
-
-#if 1
- if (enclen(enc, buf) != (p - buf))
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-#endif
- return (int )(p - buf);
-}
-
-extern int
-onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
-{
- static PosixBracketEntryType PBS[] = {
- { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )"Word", ONIGENC_CTYPE_WORD, 4 },
- { (UChar* )NULL, -1, 0 }
- };
-
- PosixBracketEntryType *pb;
- int len;
-
- len = onigenc_strlen(enc, p, end);
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (len == pb->len &&
- onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
- return pb->ctype;
- }
-
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
-}
-
-extern int
-onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end)
-{
- OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end);
-
- if (code > 127) return 0;
-
- return ONIGENC_IS_ASCII_CODE_WORD(code);
-}
-
-extern int
-onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
- return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-
-extern int
-onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
- return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-
-extern int
-onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
- const UChar* sascii /* ascii */, int n)
-{
- int x, c;
-
- while (n-- > 0) {
- if (p >= end) return (int )(*sascii);
-
- c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
- x = *sascii - c;
- if (x) return x;
-
- sascii++;
- p += enclen(enc, p);
- }
- return 0;
-}
-
-extern int
-onig_codes_cmp(OnigCodePoint a[], OnigCodePoint b[], int n)
-{
- int i;
-
- for (i = 0; i < n; i++) {
- if (a[i] != b[i])
- return -1;
- }
-
- return 0;
-}
-
-extern int
-onig_codes_byte_at(OnigCodePoint codes[], int at)
-{
- int index;
- int b;
- OnigCodePoint code;
-
- index = at / 3;
- b = at % 3;
- code = codes[index];
-
- return ((code >> ((2 - b) * 8)) & 0xff);
-}
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c
deleted file mode 100644
index 0dca34a8ad29..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regerror.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/**********************************************************************
- regerror.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h"
-#if 0
-#include <stdio.h> /* for vsnprintf() */
-
-#include <stdarg.h>
-#endif
-
-extern UChar*
-onig_error_code_to_format(int code)
-{
- char *p;
-
- switch (code) {
- case ONIG_MISMATCH:
- p = "mismatch"; break;
- case ONIG_NO_SUPPORT_CONFIG:
- p = "no support in this configuration"; break;
- case ONIG_ABORT:
- p = "abort"; break;
- case ONIGERR_MEMORY:
- p = "fail to memory allocation"; break;
- case ONIGERR_MATCH_STACK_LIMIT_OVER:
- p = "match-stack limit over"; break;
- case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
- p = "parse depth limit over"; break;
- case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER:
- p = "retry-limit-in-match over"; break;
- case ONIGERR_TYPE_BUG:
- p = "undefined type (bug)"; break;
- case ONIGERR_PARSER_BUG:
- p = "internal parser error (bug)"; break;
- case ONIGERR_STACK_BUG:
- p = "stack error (bug)"; break;
- case ONIGERR_UNDEFINED_BYTECODE:
- p = "undefined bytecode (bug)"; break;
- case ONIGERR_UNEXPECTED_BYTECODE:
- p = "unexpected bytecode (bug)"; break;
- case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
- p = "default multibyte-encoding is not setted"; break;
- case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
- p = "can't convert to wide-char on specified multibyte-encoding"; break;
- case ONIGERR_FAIL_TO_INITIALIZE:
- p = "fail to initialize"; break;
- case ONIGERR_INVALID_ARGUMENT:
- p = "invalid argument"; break;
- case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
- p = "end pattern at left brace"; break;
- case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
- p = "end pattern at left bracket"; break;
- case ONIGERR_EMPTY_CHAR_CLASS:
- p = "empty char-class"; break;
- case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
- p = "premature end of char-class"; break;
- case ONIGERR_END_PATTERN_AT_ESCAPE:
- p = "end pattern at escape"; break;
- case ONIGERR_END_PATTERN_AT_META:
- p = "end pattern at meta"; break;
- case ONIGERR_END_PATTERN_AT_CONTROL:
- p = "end pattern at control"; break;
- case ONIGERR_META_CODE_SYNTAX:
- p = "invalid meta-code syntax"; break;
- case ONIGERR_CONTROL_CODE_SYNTAX:
- p = "invalid control-code syntax"; break;
- case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
- p = "char-class value at end of range"; break;
- case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
- p = "char-class value at start of range"; break;
- case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
- p = "unmatched range specifier in char-class"; break;
- case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
- p = "target of repeat operator is not specified"; break;
- case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
- p = "target of repeat operator is invalid"; break;
- case ONIGERR_NESTED_REPEAT_OPERATOR:
- p = "nested repeat operator"; break;
- case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
- p = "unmatched close parenthesis"; break;
- case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
- p = "end pattern with unmatched parenthesis"; break;
- case ONIGERR_END_PATTERN_IN_GROUP:
- p = "end pattern in group"; break;
- case ONIGERR_UNDEFINED_GROUP_OPTION:
- p = "undefined group option"; break;
- case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
- p = "invalid POSIX bracket type"; break;
- case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
- p = "invalid pattern in look-behind"; break;
- case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
- p = "invalid repeat range {lower,upper}"; break;
- case ONIGERR_TOO_BIG_NUMBER:
- p = "too big number"; break;
- case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
- p = "too big number for repeat range"; break;
- case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
- p = "upper is smaller than lower in repeat range"; break;
- case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
- p = "empty range in char class"; break;
- case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
- p = "mismatch multibyte code length in char-class range"; break;
- case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
- p = "too many multibyte code ranges are specified"; break;
- case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
- p = "too short multibyte code string"; break;
- case ONIGERR_TOO_BIG_BACKREF_NUMBER:
- p = "too big backref number"; break;
- case ONIGERR_INVALID_BACKREF:
- p = "invalid backref number/name"; break;
- case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
- p = "numbered backref/call is not allowed. (use name)"; break;
- case ONIGERR_TOO_MANY_CAPTURES:
- p = "too many captures"; break;
- case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
- p = "too big wide-char value"; break;
- case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
- p = "too long wide-char value"; break;
- case ONIGERR_INVALID_CODE_POINT_VALUE:
- p = "invalid code point value"; break;
- case ONIGERR_EMPTY_GROUP_NAME:
- p = "group name is empty"; break;
- case ONIGERR_INVALID_GROUP_NAME:
- p = "invalid group name <%n>"; break;
- case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
- p = "invalid char in group name <%n>"; break;
- case ONIGERR_UNDEFINED_NAME_REFERENCE:
- p = "undefined name <%n> reference"; break;
- case ONIGERR_UNDEFINED_GROUP_REFERENCE:
- p = "undefined group <%n> reference"; break;
- case ONIGERR_MULTIPLEX_DEFINED_NAME:
- p = "multiplex defined name <%n>"; break;
- case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- p = "multiplex definition name <%n> call"; break;
- case ONIGERR_NEVER_ENDING_RECURSION:
- p = "never ending recursion"; break;
- case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
- p = "group number is too big for capture history"; break;
- case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- p = "invalid character property name {%n}"; break;
- case ONIGERR_INVALID_IF_ELSE_SYNTAX:
- p = "invalid if-else syntax"; break;
- case ONIGERR_INVALID_ABSENT_GROUP_PATTERN:
- p = "invalid absent group pattern"; break;
- case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN:
- p = "invalid absent group generator pattern"; break;
- case ONIGERR_INVALID_CALLOUT_PATTERN:
- p = "invalid callout pattern"; break;
- case ONIGERR_INVALID_CALLOUT_NAME:
- p = "invalid callout name"; break;
- case ONIGERR_UNDEFINED_CALLOUT_NAME:
- p = "undefined callout name"; break;
- case ONIGERR_INVALID_CALLOUT_BODY:
- p = "invalid callout body"; break;
- case ONIGERR_INVALID_CALLOUT_TAG_NAME:
- p = "invalid callout tag name"; break;
- case ONIGERR_INVALID_CALLOUT_ARG:
- p = "invalid callout arg"; break;
- case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
- p = "not supported encoding combination"; break;
- case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
- p = "invalid combination of options"; break;
- case ONIGERR_LIBRARY_IS_NOT_INITIALIZED:
- p = "library is not initialized"; break;
-
- default:
- p = "undefined error code"; break;
- }
-
- return (UChar* )p;
-}
-
-static void sprint_byte(char* s, unsigned int v)
-{
- sprintf_s(s, sizeof("00"), "%02x", (v & 0377));
-}
-
-static void sprint_byte_with_x(char* s, unsigned int v)
-{
- sprintf_s(s, sizeof("\\x00"), "\\x%02x", (v & 0377));
-}
-
-static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
- UChar buf[], int buf_size, int *is_over)
-{
- int len;
- UChar *p;
- OnigCodePoint code;
-
- if (!s) {
- len = 0;
- *is_over = 0;
- }
- else if (ONIGENC_MBC_MINLEN(enc) > 1) {
- p = s;
- len = 0;
- while (p < end) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (code >= 0x80) {
- if (code > 0xffff && len + 10 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
- sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
- len += 10;
- }
- else if (len + 6 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
- len += 6;
- }
- else {
- break;
- }
- }
- else {
- buf[len++] = (UChar )code;
- }
-
- p += enclen(enc, p);
- if (len >= buf_size) break;
- }
-
- *is_over = p < end;
- }
- else {
- len = MIN((int )(end - s), buf_size);
- xmemcpy(buf, s, (size_t )len);
- *is_over = ((buf_size < (end - s)) ? 1 : 0);
- }
-
- return len;
-}
-
-
-extern int
-onig_is_error_code_needs_param(int code)
-{
- switch (code) {
- case ONIGERR_UNDEFINED_NAME_REFERENCE:
- case ONIGERR_UNDEFINED_GROUP_REFERENCE:
- case ONIGERR_MULTIPLEX_DEFINED_NAME:
- case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- case ONIGERR_INVALID_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- return 1;
- default:
- return 0;
- }
-}
-
-/* for ONIG_MAX_ERROR_MESSAGE_LEN */
-#define MAX_ERROR_PAR_LEN 30
-
-extern int EFIAPI onig_error_code_to_str(UChar* s, int code, ...)
-{
- UChar *p, *q;
- OnigErrorInfo* einfo;
- int len, is_over;
- UChar parbuf[MAX_ERROR_PAR_LEN];
- va_list vargs;
-
- va_init_list(vargs, code);
-
- switch (code) {
- case ONIGERR_UNDEFINED_NAME_REFERENCE:
- case ONIGERR_UNDEFINED_GROUP_REFERENCE:
- case ONIGERR_MULTIPLEX_DEFINED_NAME:
- case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- case ONIGERR_INVALID_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- einfo = va_arg(vargs, OnigErrorInfo*);
- len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
- parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
- q = onig_error_code_to_format(code);
- p = s;
- while (*q != '\0') {
- if (*q == '%') {
- q++;
- if (*q == 'n') { /* '%n': name */
- xmemcpy(p, parbuf, len);
- p += len;
- if (is_over != 0) {
- xmemcpy(p, "...", 3);
- p += 3;
- }
- q++;
- }
- else
- goto normal_char;
- }
- else {
- normal_char:
- *p++ = *q++;
- }
- }
- *p = '\0';
- len = (int )(p - s);
- break;
-
- default:
- q = onig_error_code_to_format(code);
- len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
- xmemcpy(s, q, len);
- s[len] = '\0';
- break;
- }
-
- va_end(vargs);
- return len;
-}
-
-
-void EFIAPI onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, ...)
-{
- int n, need, len;
- UChar *p, *s, *bp;
- UChar bs[6];
- va_list args;
-
- va_init_list(args, fmt);
- n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
- va_end(args);
-
- need = (int )(pat_end - pat) * 4 + 4;
-
- if (n + need < bufsize) {
- strcat_s((char* )buf, bufsize, ": /");
- s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
-
- p = pat;
- while (p < pat_end) {
- if (ONIGENC_IS_MBC_HEAD(enc, p)) {
- len = enclen(enc, p);
- if (ONIGENC_MBC_MINLEN(enc) == 1) {
- while (len-- > 0) *s++ = *p++;
- }
- else { /* for UTF16/32 */
- int blen;
-
- while (len-- > 0) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
- blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
- bp = bs;
- while (blen-- > 0) *s++ = *bp++;
- }
- }
- }
- else if (*p == '\\') {
- *s++ = *p++;
- len = enclen(enc, p);
- while (len-- > 0) *s++ = *p++;
- }
- else if (*p == '/') {
- *s++ = (unsigned char )'\\';
- *s++ = *p++;
- }
- else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
- !ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
- len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
- bp = bs;
- while (len-- > 0) *s++ = *bp++;
- }
- else {
- *s++ = *p++;
- }
- }
-
- *s++ = '/';
- *s = '\0';
- }
-}
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
deleted file mode 100644
index 6a7890910781..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regexec.c
+++ /dev/null
@@ -1,5874 +0,0 @@
-/**********************************************************************
- regexec.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include "regint.h"
-
-#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \
- ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end))
-
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
-#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
- (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
- ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
-#endif
-
-#define CHECK_INTERRUPT_IN_MATCH
-
-#ifdef USE_CALLOUT
-typedef struct {
- int last_match_at_call_counter;
- struct {
- OnigType type;
- OnigValue val;
- } slot[ONIG_CALLOUT_DATA_SLOT_NUM];
-} CalloutData;
-#endif
-
-struct OnigMatchParamStruct {
- unsigned int match_stack_limit;
- unsigned long retry_limit_in_match;
-#ifdef USE_CALLOUT
- OnigCalloutFunc progress_callout_of_contents;
- OnigCalloutFunc retraction_callout_of_contents;
- int match_at_call_counter;
- void* callout_user_data;
- CalloutData* callout_data;
- int callout_data_alloc_num;
-#endif
-};
-
-extern int
-onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param,
- unsigned int limit)
-{
- param->match_stack_limit = limit;
- return ONIG_NORMAL;
-}
-
-extern int
-onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
- unsigned long limit)
-{
- param->retry_limit_in_match = limit;
- return ONIG_NORMAL;
-}
-
-extern int
-onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
-{
-#ifdef USE_CALLOUT
- param->progress_callout_of_contents = f;
- return ONIG_NORMAL;
-#else
- return ONIG_NO_SUPPORT_CONFIG;
-#endif
-}
-
-extern int
-onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
-{
-#ifdef USE_CALLOUT
- param->retraction_callout_of_contents = f;
- return ONIG_NORMAL;
-#else
- return ONIG_NO_SUPPORT_CONFIG;
-#endif
-}
-
-extern int
-onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
-{
-#ifdef USE_CALLOUT
- param->callout_user_data = user_data;
- return ONIG_NORMAL;
-#else
- return ONIG_NO_SUPPORT_CONFIG;
-#endif
-}
-
-
-typedef struct {
- void* stack_p;
- int stack_n;
- OnigOptionType options;
- OnigRegion* region;
- int ptr_num;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
- unsigned int match_stack_limit;
- unsigned long retry_limit_in_match;
- OnigMatchParam* mp;
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- int best_len; /* for ONIG_OPTION_FIND_LONGEST */
- UChar* best_s;
-#endif
-} MatchArg;
-
-
-#ifdef ONIG_DEBUG
-
-/* arguments type */
-typedef enum {
- ARG_SPECIAL = -1,
- ARG_NON = 0,
- ARG_RELADDR = 1,
- ARG_ABSADDR = 2,
- ARG_LENGTH = 3,
- ARG_MEMNUM = 4,
- ARG_OPTION = 5,
- ARG_MODE = 6
-} OpArgType;
-
-typedef struct {
- short int opcode;
- char* name;
-} OpInfoType;
-
-static OpInfoType OpInfo[] = {
- { OP_FINISH, "finish" },
- { OP_END, "end" },
- { OP_EXACT1, "exact1" },
- { OP_EXACT2, "exact2" },
- { OP_EXACT3, "exact3" },
- { OP_EXACT4, "exact4" },
- { OP_EXACT5, "exact5" },
- { OP_EXACTN, "exactn" },
- { OP_EXACTMB2N1, "exactmb2-n1" },
- { OP_EXACTMB2N2, "exactmb2-n2" },
- { OP_EXACTMB2N3, "exactmb2-n3" },
- { OP_EXACTMB2N, "exactmb2-n" },
- { OP_EXACTMB3N, "exactmb3n" },
- { OP_EXACTMBN, "exactmbn" },
- { OP_EXACT1_IC, "exact1-ic" },
- { OP_EXACTN_IC, "exactn-ic" },
- { OP_CCLASS, "cclass" },
- { OP_CCLASS_MB, "cclass-mb" },
- { OP_CCLASS_MIX, "cclass-mix" },
- { OP_CCLASS_NOT, "cclass-not" },
- { OP_CCLASS_MB_NOT, "cclass-mb-not" },
- { OP_CCLASS_MIX_NOT, "cclass-mix-not" },
- { OP_ANYCHAR, "anychar" },
- { OP_ANYCHAR_ML, "anychar-ml" },
- { OP_ANYCHAR_STAR, "anychar*" },
- { OP_ANYCHAR_ML_STAR, "anychar-ml*" },
- { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next" },
- { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next" },
- { OP_WORD, "word" },
- { OP_WORD_ASCII, "word-ascii" },
- { OP_NO_WORD, "not-word" },
- { OP_NO_WORD_ASCII, "not-word-ascii" },
- { OP_WORD_BOUNDARY, "word-boundary" },
- { OP_NO_WORD_BOUNDARY, "not-word-boundary" },
- { OP_WORD_BEGIN, "word-begin" },
- { OP_WORD_END, "word-end" },
- { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" },
- { OP_BEGIN_BUF, "begin-buf" },
- { OP_END_BUF, "end-buf" },
- { OP_BEGIN_LINE, "begin-line" },
- { OP_END_LINE, "end-line" },
- { OP_SEMI_END_BUF, "semi-end-buf" },
- { OP_BEGIN_POSITION, "begin-position" },
- { OP_BACKREF1, "backref1" },
- { OP_BACKREF2, "backref2" },
- { OP_BACKREF_N, "backref-n" },
- { OP_BACKREF_N_IC, "backref-n-ic" },
- { OP_BACKREF_MULTI, "backref_multi" },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic" },
- { OP_BACKREF_WITH_LEVEL, "backref_with_level" },
- { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c" },
- { OP_BACKREF_CHECK, "backref_check" },
- { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level" },
- { OP_MEMORY_START_PUSH, "mem-start-push" },
- { OP_MEMORY_START, "mem-start" },
- { OP_MEMORY_END_PUSH, "mem-end-push" },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec" },
- { OP_MEMORY_END, "mem-end" },
- { OP_MEMORY_END_REC, "mem-end-rec" },
- { OP_FAIL, "fail" },
- { OP_JUMP, "jump" },
- { OP_PUSH, "push" },
- { OP_PUSH_SUPER, "push-super" },
- { OP_POP_OUT, "pop-out" },
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1" },
-#endif
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next" },
- { OP_REPEAT, "repeat" },
- { OP_REPEAT_NG, "repeat-ng" },
- { OP_REPEAT_INC, "repeat-inc" },
- { OP_REPEAT_INC_NG, "repeat-inc-ng" },
- { OP_REPEAT_INC_SG, "repeat-inc-sg" },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
- { OP_EMPTY_CHECK_START, "empty-check-start" },
- { OP_EMPTY_CHECK_END, "empty-check-end" },
- { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
- { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
- { OP_PREC_READ_START, "push-pos" },
- { OP_PREC_READ_END, "pop-pos" },
- { OP_PREC_READ_NOT_START, "prec-read-not-start" },
- { OP_PREC_READ_NOT_END, "prec-read-not-end" },
- { OP_ATOMIC_START, "atomic-start" },
- { OP_ATOMIC_END, "atomic-end" },
- { OP_LOOK_BEHIND, "look-behind" },
- { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
- { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
- { OP_CALL, "call" },
- { OP_RETURN, "return" },
- { OP_PUSH_SAVE_VAL, "push-save-val" },
- { OP_UPDATE_VAR, "update-var" },
-#ifdef USE_CALLOUT
- { OP_CALLOUT_CONTENTS, "callout-contents" },
- { OP_CALLOUT_NAME, "callout-name" },
-#endif
- { -1, "" }
-};
-
-static char*
-op2name(int opcode)
-{
- int i;
-
- for (i = 0; OpInfo[i].opcode >= 0; i++) {
- if (opcode == OpInfo[i].opcode) return OpInfo[i].name;
- }
-
- return "";
-}
-
-static void
-p_string(FILE* f, int len, UChar* s)
-{
- fputs(":", f);
- while (len-- > 0) { fputc(*s++, f); }
-}
-
-static void
-p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
-{
- int x = len * mb_len;
-
- fprintf(f, ":%d:", len);
- while (x-- > 0) { fputc(*s++, f); }
-}
-
-static void
-p_rel_addr(FILE* f, RelAddrType rel_addr, Operation* p, Operation* start)
-{
- RelAddrType curr = (RelAddrType )(p - start);
-
- fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
-}
-
-static int
-bitset_on_num(BitSetRef bs)
-{
- int i, n;
-
- n = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) n++;
- }
-
- return n;
-}
-
-static void
-print_compiled_byte_code(FILE* f, regex_t* reg, int index,
- Operation* start, OnigEncoding enc)
-{
- int i, n;
- RelAddrType addr;
- LengthType len;
- MemNumType mem;
- OnigCodePoint code;
- ModeType mode;
- UChar *q;
- Operation* p;
- enum OpCode opcode;
-
- p = reg->ops + index;
-
-#ifdef USE_DIRECT_THREADED_CODE
- opcode = reg->ocs[index];
-#else
- opcode = p->opcode;
-#endif
-
- fprintf(f, "%s", op2name(opcode));
- switch (opcode) {
- case OP_EXACT1:
- p_string(f, 1, p->exact.s); break;
- case OP_EXACT2:
- p_string(f, 2, p->exact.s); break;
- case OP_EXACT3:
- p_string(f, 3, p->exact.s); break;
- case OP_EXACT4:
- p_string(f, 4, p->exact.s); break;
- case OP_EXACT5:
- p_string(f, 5, p->exact.s); break;
- case OP_EXACTN:
- len = p->exact_n.n;
- p_string(f, len, p->exact_n.s); break;
- case OP_EXACTMB2N1:
- p_string(f, 2, p->exact.s); break;
- case OP_EXACTMB2N2:
- p_string(f, 4, p->exact.s); break;
- case OP_EXACTMB2N3:
- p_string(f, 3, p->exact.s); break;
- case OP_EXACTMB2N:
- len = p->exact_n.n;
- p_len_string(f, len, 2, p->exact_n.s); break;
- case OP_EXACTMB3N:
- len = p->exact_n.n;
- p_len_string(f, len, 3, p->exact_n.s); break;
- case OP_EXACTMBN:
- {
- int mb_len;
-
- mb_len = p->exact_len_n.len;
- len = p->exact_len_n.n;
- q = p->exact_len_n.s;
- fprintf(f, ":%d:%d:", mb_len, len);
- n = len * mb_len;
- while (n-- > 0) { fputc(*q++, f); }
- }
- break;
- case OP_EXACT1_IC:
- len = enclen(enc, p->exact.s);
- p_string(f, len, p->exact.s);
- break;
- case OP_EXACTN_IC:
- len = p->exact_n.n;
- p_len_string(f, len, 1, p->exact_n.s);
- break;
-
- case OP_CCLASS:
- case OP_CCLASS_NOT:
- n = bitset_on_num(p->cclass.bsp);
- fprintf(f, ":%d", n);
- break;
- case OP_CCLASS_MB:
- case OP_CCLASS_MB_NOT:
- {
- OnigCodePoint ncode;
- OnigCodePoint* codes;
-
- codes = (OnigCodePoint* )p->cclass_mb.mb;
- GET_CODE_POINT(ncode, codes);
- codes++;
- GET_CODE_POINT(code, codes);
- fprintf(f, ":%u:%u", code, ncode);
- }
- break;
- case OP_CCLASS_MIX:
- case OP_CCLASS_MIX_NOT:
- {
- OnigCodePoint ncode;
- OnigCodePoint* codes;
-
- codes = (OnigCodePoint* )p->cclass_mix.mb;
- n = bitset_on_num(p->cclass_mix.bsp);
-
- GET_CODE_POINT(ncode, codes);
- codes++;
- GET_CODE_POINT(code, codes);
- fprintf(f, ":%d:%u:%u", n, code, ncode);
- }
- break;
-
- case OP_ANYCHAR_STAR_PEEK_NEXT:
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
- p_string(f, 1, &(p->anychar_star_peek_next.c));
- break;
-
- case OP_WORD_BOUNDARY:
- case OP_NO_WORD_BOUNDARY:
- case OP_WORD_BEGIN:
- case OP_WORD_END:
- mode = p->word_boundary.mode;
- fprintf(f, ":%d", mode);
- break;
-
- case OP_BACKREF_N:
- case OP_BACKREF_N_IC:
- mem = p->backref_n.n1;
- fprintf(f, ":%d", mem);
- break;
- case OP_BACKREF_MULTI_IC:
- case OP_BACKREF_MULTI:
- case OP_BACKREF_CHECK:
- fputs(" ", f);
- n = p->backref_general.num;
- for (i = 0; i < n; i++) {
- mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- break;
- case OP_BACKREF_WITH_LEVEL:
- case OP_BACKREF_WITH_LEVEL_IC:
- case OP_BACKREF_CHECK_WITH_LEVEL:
- {
- LengthType level;
-
- level = p->backref_general.nest_level;
- fprintf(f, ":%d", level);
- fputs(" ", f);
- n = p->backref_general.num;
- for (i = 0; i < n; i++) {
- mem = (n == 1) ? p->backref_general.n1 : p->backref_general.ns[i];
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- }
- break;
-
- case OP_MEMORY_START:
- case OP_MEMORY_START_PUSH:
- mem = p->memory_start.num;
- fprintf(f, ":%d", mem);
- break;
- case OP_MEMORY_END_PUSH:
- case OP_MEMORY_END_PUSH_REC:
- case OP_MEMORY_END:
- case OP_MEMORY_END_REC:
- mem = p->memory_end.num;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_JUMP:
- addr = p->jump.addr;
- fputc(':', f);
- p_rel_addr(f, addr, p, start);
- break;
-
- case OP_PUSH:
- case OP_PUSH_SUPER:
- addr = p->push.addr;
- fputc(':', f);
- p_rel_addr(f, addr, p, start);
- break;
-
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- case OP_PUSH_OR_JUMP_EXACT1:
- addr = p->push_or_jump_exact1.addr;
- fputc(':', f);
- p_rel_addr(f, addr, p, start);
- p_string(f, 1, &(p->push_or_jump_exact1.c));
- break;
-#endif
-
- case OP_PUSH_IF_PEEK_NEXT:
- addr = p->push_if_peek_next.addr;
- fputc(':', f);
- p_rel_addr(f, addr, p, start);
- p_string(f, 1, &(p->push_if_peek_next.c));
- break;
-
- case OP_REPEAT:
- case OP_REPEAT_NG:
- mem = p->repeat.id;
- addr = p->repeat.addr;
- fprintf(f, ":%d:", mem);
- p_rel_addr(f, addr, p, start);
- break;
-
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
- mem = p->repeat.id;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_EMPTY_CHECK_START:
- mem = p->empty_check_start.mem;
- fprintf(f, ":%d", mem);
- break;
- case OP_EMPTY_CHECK_END:
- case OP_EMPTY_CHECK_END_MEMST:
- case OP_EMPTY_CHECK_END_MEMST_PUSH:
- mem = p->empty_check_end.mem;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_PREC_READ_NOT_START:
- addr = p->prec_read_not_start.addr;
- fputc(':', f);
- p_rel_addr(f, addr, p, start);
- break;
-
- case OP_LOOK_BEHIND:
- len = p->look_behind.len;
- fprintf(f, ":%d", len);
- break;
-
- case OP_LOOK_BEHIND_NOT_START:
- addr = p->look_behind_not_start.addr;
- len = p->look_behind_not_start.len;
- fprintf(f, ":%d:", len);
- p_rel_addr(f, addr, p, start);
- break;
-
- case OP_CALL:
- addr = p->call.addr;
- fprintf(f, ":{/%d}", addr);
- break;
-
- case OP_PUSH_SAVE_VAL:
- {
- SaveType type;
-
- type = p->push_save_val.type;
- mem = p->push_save_val.id;
- fprintf(f, ":%d:%d", type, mem);
- }
- break;
-
- case OP_UPDATE_VAR:
- {
- UpdateVarType type;
-
- type = p->update_var.type;
- mem = p->update_var.id;
- fprintf(f, ":%d:%d", type, mem);
- }
- break;
-
-#ifdef USE_CALLOUT
- case OP_CALLOUT_CONTENTS:
- mem = p->callout_contents.num;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_CALLOUT_NAME:
- {
- int id;
-
- id = p->callout_name.id;
- mem = p->callout_name.num;
- fprintf(f, ":%d:%d", id, mem);
- }
- break;
-#endif
-
- case OP_TEXT_SEGMENT_BOUNDARY:
- if (p->text_segment_boundary.not != 0)
- fprintf(f, ":not");
- break;
-
- case OP_FINISH:
- case OP_END:
- case OP_ANYCHAR:
- case OP_ANYCHAR_ML:
- case OP_ANYCHAR_STAR:
- case OP_ANYCHAR_ML_STAR:
- case OP_WORD:
- case OP_WORD_ASCII:
- case OP_NO_WORD:
- case OP_NO_WORD_ASCII:
- case OP_BEGIN_BUF:
- case OP_END_BUF:
- case OP_BEGIN_LINE:
- case OP_END_LINE:
- case OP_SEMI_END_BUF:
- case OP_BEGIN_POSITION:
- case OP_BACKREF1:
- case OP_BACKREF2:
- case OP_FAIL:
- case OP_POP_OUT:
- case OP_PREC_READ_START:
- case OP_PREC_READ_END:
- case OP_PREC_READ_NOT_END:
- case OP_ATOMIC_START:
- case OP_ATOMIC_END:
- case OP_LOOK_BEHIND_NOT_END:
- case OP_RETURN:
- break;
-
- default:
- fprintf(stderr, "print_compiled_byte_code: undefined code %d\n", opcode);
- break;
- }
-}
-#endif /* ONIG_DEBUG */
-
-#ifdef ONIG_DEBUG_COMPILE
-extern void
-onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
-{
- Operation* bp;
- Operation* start = reg->ops;
- Operation* end = reg->ops + reg->ops_used;
-
- fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
- reg->bt_mem_start, reg->bt_mem_end);
- fprintf(f, "code-length: %d\n", reg->ops_used);
-
- bp = start;
- while (bp < end) {
- int pos = bp - start;
-
- fprintf(f, "%4d: ", pos);
- print_compiled_byte_code(f, reg, pos, start, reg->enc);
- fprintf(f, "\n");
- bp++;
- }
- fprintf(f, "\n");
-}
-#endif
-
-
-#ifdef USE_CAPTURE_HISTORY
-static void history_tree_free(OnigCaptureTreeNode* node);
-
-static void
-history_tree_clear(OnigCaptureTreeNode* node)
-{
- int i;
-
- if (IS_NULL(node)) return ;
-
- for (i = 0; i < node->num_childs; i++) {
- if (IS_NOT_NULL(node->childs[i])) {
- history_tree_free(node->childs[i]);
- }
- }
- for (i = 0; i < node->allocated; i++) {
- node->childs[i] = (OnigCaptureTreeNode* )0;
- }
- node->num_childs = 0;
- node->beg = ONIG_REGION_NOTPOS;
- node->end = ONIG_REGION_NOTPOS;
- node->group = -1;
-}
-
-static void
-history_tree_free(OnigCaptureTreeNode* node)
-{
- history_tree_clear(node);
- if (IS_NOT_NULL(node->childs)) xfree(node->childs);
-
- xfree(node);
-}
-
-static void
-history_root_free(OnigRegion* r)
-{
- if (IS_NULL(r->history_root)) return ;
-
- history_tree_free(r->history_root);
- r->history_root = (OnigCaptureTreeNode* )0;
-}
-
-static OnigCaptureTreeNode*
-history_node_new(void)
-{
- OnigCaptureTreeNode* node;
-
- node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
- CHECK_NULL_RETURN(node);
-
- node->childs = (OnigCaptureTreeNode** )0;
- node->allocated = 0;
- node->num_childs = 0;
- node->group = -1;
- node->beg = ONIG_REGION_NOTPOS;
- node->end = ONIG_REGION_NOTPOS;
-
- return node;
-}
-
-static int
-history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
-{
-#define HISTORY_TREE_INIT_ALLOC_SIZE 8
-
- if (parent->num_childs >= parent->allocated) {
- int n, i;
-
- if (IS_NULL(parent->childs)) {
- n = HISTORY_TREE_INIT_ALLOC_SIZE;
- parent->childs =
- (OnigCaptureTreeNode** )xmalloc(sizeof(parent->childs[0]) * n);
- }
- else {
- n = parent->allocated * 2;
- parent->childs =
- (OnigCaptureTreeNode** )xrealloc(parent->childs,
- sizeof(parent->childs[0]) * n,
- sizeof(parent->childs[0]) * parent->allocated);
- }
- CHECK_NULL_RETURN_MEMERR(parent->childs);
- for (i = parent->allocated; i < n; i++) {
- parent->childs[i] = (OnigCaptureTreeNode* )0;
- }
- parent->allocated = n;
- }
-
- parent->childs[parent->num_childs] = child;
- parent->num_childs++;
- return 0;
-}
-
-static OnigCaptureTreeNode*
-history_tree_clone(OnigCaptureTreeNode* node)
-{
- int i;
- OnigCaptureTreeNode *clone, *child;
-
- clone = history_node_new();
- CHECK_NULL_RETURN(clone);
-
- clone->beg = node->beg;
- clone->end = node->end;
- for (i = 0; i < node->num_childs; i++) {
- child = history_tree_clone(node->childs[i]);
- if (IS_NULL(child)) {
- history_tree_free(clone);
- return (OnigCaptureTreeNode* )0;
- }
- history_tree_add_child(clone, child);
- }
-
- return clone;
-}
-
-extern OnigCaptureTreeNode*
-onig_get_capture_tree(OnigRegion* region)
-{
- return region->history_root;
-}
-#endif /* USE_CAPTURE_HISTORY */
-
-extern void
-onig_region_clear(OnigRegion* region)
-{
- int i;
-
- for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(region);
-#endif
-}
-
-extern int
-onig_region_resize(OnigRegion* region, int n)
-{
- region->num_regs = n;
-
- if (n < ONIG_NREGION)
- n = ONIG_NREGION;
-
- if (region->allocated == 0) {
- region->beg = (int* )xmalloc(n * sizeof(int));
- region->end = (int* )xmalloc(n * sizeof(int));
-
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = n;
- }
- else if (region->allocated < n) {
- region->beg = (int* )xrealloc(region->beg, n * sizeof(int), region->allocated * sizeof(int));
- region->end = (int* )xrealloc(region->end, n * sizeof(int), region->allocated * sizeof(int));
-
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = n;
- }
-
- return 0;
-}
-
-static int
-onig_region_resize_clear(OnigRegion* region, int n)
-{
- int r;
-
- r = onig_region_resize(region, n);
- if (r != 0) return r;
- onig_region_clear(region);
- return 0;
-}
-
-extern int
-onig_region_set(OnigRegion* region, int at, int beg, int end)
-{
- if (at < 0) return ONIGERR_INVALID_ARGUMENT;
-
- if (at >= region->allocated) {
- int r = onig_region_resize(region, at + 1);
- if (r < 0) return r;
- }
-
- region->beg[at] = beg;
- region->end[at] = end;
- return 0;
-}
-
-extern void
-onig_region_init(OnigRegion* region)
-{
- region->num_regs = 0;
- region->allocated = 0;
- region->beg = (int* )0;
- region->end = (int* )0;
- region->history_root = (OnigCaptureTreeNode* )0;
-}
-
-extern OnigRegion*
-onig_region_new(void)
-{
- OnigRegion* r;
-
- r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
- CHECK_NULL_RETURN(r);
- onig_region_init(r);
- return r;
-}
-
-extern void
-onig_region_free(OnigRegion* r, int free_self)
-{
- if (r != 0) {
- if (r->allocated > 0) {
- if (r->beg) xfree(r->beg);
- if (r->end) xfree(r->end);
- r->allocated = 0;
- }
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(r);
-#endif
- if (free_self) xfree(r);
- }
-}
-
-extern void
-onig_region_copy(OnigRegion* to, OnigRegion* from)
-{
-#define RREGC_SIZE (sizeof(int) * from->num_regs)
- int i;
-
- if (to == from) return;
-
- if (to->allocated == 0) {
- if (from->num_regs > 0) {
- to->beg = (int* )xmalloc(RREGC_SIZE);
- if (IS_NULL(to->beg)) return;
- to->end = (int* )xmalloc(RREGC_SIZE);
- if (IS_NULL(to->end)) return;
- to->allocated = from->num_regs;
- }
- }
- else if (to->allocated < from->num_regs) {
- to->beg = (int* )xrealloc(to->beg, RREGC_SIZE, sizeof(int) * to->allocated);
- if (IS_NULL(to->beg)) return;
- to->end = (int* )xrealloc(to->end, RREGC_SIZE, sizeof(int) * to->allocated);
- if (IS_NULL(to->end)) return;
- to->allocated = from->num_regs;
- }
-
- for (i = 0; i < from->num_regs; i++) {
- to->beg[i] = from->beg[i];
- to->end[i] = from->end[i];
- }
- to->num_regs = from->num_regs;
-
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(to);
-
- if (IS_NOT_NULL(from->history_root)) {
- to->history_root = history_tree_clone(from->history_root);
- }
-#endif
-}
-
-#ifdef USE_CALLOUT
-#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \
- args.in = (ain);\
- args.name_id = (aname_id);\
- args.num = anum;\
- args.regex = reg;\
- args.string = str;\
- args.string_end = end;\
- args.start = sstart;\
- args.right_range = right_range;\
- args.current = s;\
- args.retry_in_match_counter = retry_in_match_counter;\
- args.msa = msa;\
- args.stk_base = stk_base;\
- args.stk = stk;\
- args.mem_start_stk = mem_start_stk;\
- args.mem_end_stk = mem_end_stk;\
- result = (func)(&args, user);\
-} while (0)
-
-#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\
- int result;\
- OnigCalloutArgs args;\
- CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\
- switch (result) {\
- case ONIG_CALLOUT_FAIL:\
- case ONIG_CALLOUT_SUCCESS:\
- break;\
- default:\
- if (result > 0) {\
- result = ONIGERR_INVALID_ARGUMENT;\
- }\
- best_len = result;\
- goto finish;\
- break;\
- }\
-} while(0)
-#endif
-
-
-/** stack **/
-#define INVALID_STACK_INDEX -1
-
-#define STK_ALT_FLAG 0x0001
-
-/* stack type */
-/* used by normal-POP */
-#define STK_SUPER_ALT STK_ALT_FLAG
-#define STK_ALT (0x0002 | STK_ALT_FLAG)
-#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG)
-#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG)
-
-/* handled by normal-POP */
-#define STK_MEM_START 0x0010
-#define STK_MEM_END 0x8030
-#define STK_REPEAT_INC 0x0050
-#ifdef USE_CALLOUT
-#define STK_CALLOUT 0x0070
-#endif
-
-/* avoided by normal-POP */
-#define STK_VOID 0x0000 /* for fill a blank */
-#define STK_EMPTY_CHECK_START 0x3000
-#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
-#define STK_MEM_END_MARK 0x8100
-#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0300
-#define STK_CALL_FRAME 0x0400
-#define STK_RETURN 0x0500
-#define STK_SAVE_VAL 0x0600
-#define STK_PREC_READ_START 0x0700
-#define STK_PREC_READ_END 0x0800
-
-/* stack type check mask */
-#define STK_MASK_POP_USED STK_ALT_FLAG
-#define STK_MASK_POP_HANDLED 0x0010
-#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004)
-#define STK_MASK_TO_VOID_TARGET 0x100e
-#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-
-typedef intptr_t StackIndex;
-
-typedef struct _StackType {
- unsigned int type;
- int zid;
- union {
- struct {
- Operation* pcode; /* byte code position */
- UChar* pstr; /* string position */
- UChar* pstr_prev; /* previous char position of pstr */
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- Operation* pcode; /* byte code position (head of repeated target) */
- } repeat;
- struct {
- StackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- UChar *pstr; /* start/end position */
- /* Following information is set, if this stack type is MEM-START */
- StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- UChar *pstr; /* start position */
- } empty_check;
-#ifdef USE_CALL
- struct {
- Operation *ret_addr; /* byte code position */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- struct {
- enum SaveType type;
- UChar* v;
- UChar* v2;
- } val;
-#ifdef USE_CALLOUT
- struct {
- int num;
- OnigCalloutFunc func;
- } callout;
-#endif
- } u;
-} StackType;
-
-#ifdef USE_CALLOUT
-
-struct OnigCalloutArgsStruct {
- OnigCalloutIn in;
- int name_id; /* name id or ONIG_NON_NAME_ID */
- int num;
- OnigRegex regex;
- const OnigUChar* string;
- const OnigUChar* string_end;
- const OnigUChar* start;
- const OnigUChar* right_range;
- const OnigUChar* current; /* current matching position */
- unsigned long retry_in_match_counter;
-
- /* invisible to users */
- MatchArg* msa;
- StackType* stk_base;
- StackType* stk;
- StackIndex* mem_start_stk;
- StackIndex* mem_end_stk;
-};
-
-#endif
-
-
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
- (msa).best_len = ONIG_MISMATCH;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
-} while(0)
-#else
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
-} while(0)
-#endif
-
-#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
-
-
-#define ALLOCA_PTR_NUM_LIMIT 50
-
-#define STACK_INIT(stack_num) do {\
- if (msa->stack_p) {\
- is_alloca = 0;\
- alloc_base = msa->stack_p;\
- stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
- stk = stk_base;\
- stk_end = stk_base + msa->stack_n;\
- }\
- else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
- is_alloca = 0;\
- alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
- + sizeof(StackType) * (stack_num));\
- CHECK_NULL_RETURN_MEMERR(alloc_base);\
- stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
- stk = stk_base;\
- stk_end = stk_base + (stack_num);\
- }\
- else {\
- is_alloca = 1;\
- alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
- + sizeof(StackType) * (stack_num));\
- CHECK_NULL_RETURN_MEMERR(alloc_base);\
- stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
- stk = stk_base;\
- stk_end = stk_base + (stack_num);\
- }\
-} while(0);
-
-
-#define STACK_SAVE do{\
- msa->stack_n = (int )(stk_end - stk_base);\
- if (is_alloca != 0) {\
- size_t size = sizeof(StackIndex) * msa->ptr_num \
- + sizeof(StackType) * msa->stack_n;\
- msa->stack_p = xmalloc(size);\
- CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
- xmemcpy(msa->stack_p, alloc_base, size);\
- }\
- else {\
- msa->stack_p = alloc_base;\
- };\
-} while(0)
-
-#define UPDATE_FOR_STACK_REALLOC do{\
- repeat_stk = (StackIndex* )alloc_base;\
- mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
- mem_end_stk = mem_start_stk + num_mem + 1;\
-} while(0)
-
-static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
-
-extern unsigned int
-onig_get_match_stack_limit_size(void)
-{
- return MatchStackLimit;
-}
-
-extern int
-onig_set_match_stack_limit_size(unsigned int size)
-{
- MatchStackLimit = size;
- return 0;
-}
-
-#ifdef USE_RETRY_LIMIT_IN_MATCH
-
-static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
-
-#define CHECK_RETRY_LIMIT_IN_MATCH do {\
- if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
-} while (0)
-
-#else
-
-#define CHECK_RETRY_LIMIT_IN_MATCH
-
-#endif /* USE_RETRY_LIMIT_IN_MATCH */
-
-extern unsigned long
-onig_get_retry_limit_in_match(void)
-{
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- return RetryLimitInMatch;
-#else
- /* return ONIG_NO_SUPPORT_CONFIG; */
- return 0;
-#endif
-}
-
-extern int
-onig_set_retry_limit_in_match(unsigned long size)
-{
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- RetryLimitInMatch = size;
- return 0;
-#else
- return ONIG_NO_SUPPORT_CONFIG;
-#endif
-}
-
-#ifdef USE_CALLOUT
-static OnigCalloutFunc DefaultProgressCallout;
-static OnigCalloutFunc DefaultRetractionCallout;
-#endif
-
-extern OnigMatchParam*
-onig_new_match_param(void)
-{
- OnigMatchParam* p;
-
- p = (OnigMatchParam* )xmalloc(sizeof(*p));
- if (IS_NOT_NULL(p)) {
- onig_initialize_match_param(p);
- }
-
- return p;
-}
-
-extern void
-onig_free_match_param_content(OnigMatchParam* p)
-{
-#ifdef USE_CALLOUT
- if (IS_NOT_NULL(p->callout_data)) {
- xfree(p->callout_data);
- p->callout_data = 0;
- }
-#endif
-}
-
-extern void
-onig_free_match_param(OnigMatchParam* p)
-{
- if (IS_NOT_NULL(p)) {
- onig_free_match_param_content(p);
- xfree(p);
- }
-}
-
-extern int
-onig_initialize_match_param(OnigMatchParam* mp)
-{
- mp->match_stack_limit = MatchStackLimit;
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- mp->retry_limit_in_match = RetryLimitInMatch;
-#endif
-
-#ifdef USE_CALLOUT
- mp->progress_callout_of_contents = DefaultProgressCallout;
- mp->retraction_callout_of_contents = DefaultRetractionCallout;
- mp->match_at_call_counter = 0;
- mp->callout_user_data = 0;
- mp->callout_data = 0;
- mp->callout_data_alloc_num = 0;
-#endif
-
- return ONIG_NORMAL;
-}
-
-#ifdef USE_CALLOUT
-
-static int
-adjust_match_param(regex_t* reg, OnigMatchParam* mp)
-{
- RegexExt* ext = reg->extp;
-
- mp->match_at_call_counter = 0;
-
- if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL;
-
- if (ext->callout_num > mp->callout_data_alloc_num) {
- CalloutData* d;
- size_t n = ext->callout_num * sizeof(*d);
- if (IS_NOT_NULL(mp->callout_data))
- d = (CalloutData* )xrealloc(mp->callout_data, n, mp->callout_data_alloc_num * sizeof(*d));
- else
- d = (CalloutData* )xmalloc(n);
- CHECK_NULL_RETURN_MEMERR(d);
-
- mp->callout_data = d;
- mp->callout_data_alloc_num = ext->callout_num;
- }
-
- xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData));
- return ONIG_NORMAL;
-}
-
-#define ADJUST_MATCH_PARAM(reg, mp) \
- r = adjust_match_param(reg, mp);\
- if (r != ONIG_NORMAL) return r;
-
-#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1))
-
-extern int
-onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args)
-{
- OnigMatchParam* mp;
- int num;
- CalloutData* d;
-
- mp = args->msa->mp;
- num = args->num;
-
- d = CALLOUT_DATA_AT_NUM(mp, num);
- if (d->last_match_at_call_counter != mp->match_at_call_counter) {
- xmemset(d, 0, sizeof(*d));
- d->last_match_at_call_counter = mp->match_at_call_counter;
- return d->last_match_at_call_counter;
- }
-
- return 0;
-}
-
-extern int
-onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp,
- int callout_num, int slot,
- OnigType* type, OnigValue* val)
-{
- OnigType t;
- CalloutData* d;
-
- if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
-
- d = CALLOUT_DATA_AT_NUM(mp, callout_num);
- t = d->slot[slot].type;
- if (IS_NOT_NULL(type)) *type = t;
- if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
- return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
-}
-
-extern int
-onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args,
- int slot, OnigType* type,
- OnigValue* val)
-{
- return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp,
- args->num, slot, type, val);
-}
-
-extern int
-onig_get_callout_data(regex_t* reg, OnigMatchParam* mp,
- int callout_num, int slot,
- OnigType* type, OnigValue* val)
-{
- OnigType t;
- CalloutData* d;
-
- if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
-
- d = CALLOUT_DATA_AT_NUM(mp, callout_num);
- if (d->last_match_at_call_counter != mp->match_at_call_counter) {
- xmemset(d, 0, sizeof(*d));
- d->last_match_at_call_counter = mp->match_at_call_counter;
- }
-
- t = d->slot[slot].type;
- if (IS_NOT_NULL(type)) *type = t;
- if (IS_NOT_NULL(val)) *val = d->slot[slot].val;
- return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL);
-}
-
-extern int
-onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
- const UChar* tag, const UChar* tag_end, int slot,
- OnigType* type, OnigValue* val)
-{
- int num;
-
- num = onig_get_callout_num_by_tag(reg, tag, tag_end);
- if (num < 0) return num;
- if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
-
- return onig_get_callout_data(reg, mp, num, slot, type, val);
-}
-
-extern int
-onig_get_callout_data_by_callout_args(OnigCalloutArgs* args,
- int callout_num, int slot,
- OnigType* type, OnigValue* val)
-{
- return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot,
- type, val);
-}
-
-extern int
-onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args,
- int slot, OnigType* type, OnigValue* val)
-{
- return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot,
- type, val);
-}
-
-extern int
-onig_set_callout_data(regex_t* reg, OnigMatchParam* mp,
- int callout_num, int slot,
- OnigType type, OnigValue* val)
-{
- CalloutData* d;
-
- if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT;
-
- d = CALLOUT_DATA_AT_NUM(mp, callout_num);
- d->slot[slot].type = type;
- d->slot[slot].val = *val;
- d->last_match_at_call_counter = mp->match_at_call_counter;
-
- return ONIG_NORMAL;
-}
-
-extern int
-onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp,
- const UChar* tag, const UChar* tag_end, int slot,
- OnigType type, OnigValue* val)
-{
- int num;
-
- num = onig_get_callout_num_by_tag(reg, tag, tag_end);
- if (num < 0) return num;
- if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
-
- return onig_set_callout_data(reg, mp, num, slot, type, val);
-}
-
-extern int
-onig_set_callout_data_by_callout_args(OnigCalloutArgs* args,
- int callout_num, int slot,
- OnigType type, OnigValue* val)
-{
- return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot,
- type, val);
-}
-
-extern int
-onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args,
- int slot, OnigType type, OnigValue* val)
-{
- return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot,
- type, val);
-}
-
-#else
-#define ADJUST_MATCH_PARAM(reg, mp)
-#endif /* USE_CALLOUT */
-
-
-static int
-stack_double(int is_alloca, char** arg_alloc_base,
- StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk,
- MatchArg* msa)
-{
- unsigned int n;
- int used;
- size_t size;
- size_t new_size;
- char* alloc_base;
- char* new_alloc_base;
- StackType *stk_base, *stk_end, *stk;
-
- alloc_base = *arg_alloc_base;
- stk_base = *arg_stk_base;
- stk_end = *arg_stk_end;
- stk = *arg_stk;
-
- n = (unsigned int )(stk_end - stk_base);
- size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
- n *= 2;
- new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
- if (is_alloca != 0) {
- new_alloc_base = (char* )xmalloc(new_size);
- if (IS_NULL(new_alloc_base)) {
- STACK_SAVE;
- return ONIGERR_MEMORY;
- }
- xmemcpy(new_alloc_base, alloc_base, size);
- }
- else {
- if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) {
- if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit)
- return ONIGERR_MATCH_STACK_LIMIT_OVER;
- else
- n = msa->match_stack_limit;
- }
- new_alloc_base = (char* )xrealloc(alloc_base, new_size, size);
- if (IS_NULL(new_alloc_base)) {
- STACK_SAVE;
- return ONIGERR_MEMORY;
- }
- }
-
- alloc_base = new_alloc_base;
- used = (int )(stk - stk_base);
- *arg_alloc_base = alloc_base;
- *arg_stk_base = (StackType* )(alloc_base
- + (sizeof(StackIndex) * msa->ptr_num));
- *arg_stk = *arg_stk_base + used;
- *arg_stk_end = *arg_stk_base + n;
- return 0;
-}
-
-#define STACK_ENSURE(n) do {\
- if ((int )(stk_end - stk) < (n)) {\
- int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\
- if (r != 0) { STACK_SAVE; return r; } \
- is_alloca = 0;\
- UPDATE_FOR_STACK_REALLOC;\
- }\
-} while(0)
-
-#define STACK_AT(index) (stk_base + (index))
-#define GET_STACK_INDEX(stk) ((stk) - stk_base)
-
-#define STACK_PUSH_TYPE(stack_type) do {\
- STACK_ENSURE(1);\
- stk->type = (stack_type);\
- STACK_INC;\
-} while(0)
-
-#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
- STACK_ENSURE(1);\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- STACK_INC;\
-} while(0)
-
-#ifdef ONIG_DEBUG_MATCH
-#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = s;\
- stk->u.state.pstr_prev = sprev;\
- STACK_INC;\
-} while (0)
-#else
-#define STACK_PUSH_BOTTOM(stack_type,pat) do {\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- STACK_INC;\
-} while (0)
-#endif
-
-#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
-#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_PREC_READ_START(s,sprev) \
- STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
-#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
- STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
-#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
-#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
- STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
-
-#define STACK_PUSH_REPEAT(sid, pat) do {\
- STACK_ENSURE(1);\
- stk->type = STK_REPEAT;\
- stk->zid = (sid);\
- stk->u.repeat.pcode = (pat);\
- stk->u.repeat.count = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_REPEAT_INC(sindex) do {\
- STACK_ENSURE(1);\
- stk->type = STK_REPEAT_INC;\
- stk->u.repeat_inc.si = (sindex);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_START(mnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_START;\
- stk->zid = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.prev_start = mem_start_stk[mnum];\
- stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
- mem_end_stk[mnum] = INVALID_STACK_INDEX;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_END(mnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_END;\
- stk->zid = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.prev_start = mem_start_stk[mnum];\
- stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_END_MARK(mnum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_END_MARK;\
- stk->zid = (mnum);\
- STACK_INC;\
-} while(0)
-
-#define STACK_GET_MEM_START(mnum, k) do {\
- int level = 0;\
- k = stk;\
- while (k > stk_base) {\
- k--;\
- if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
- && k->zid == (mnum)) {\
- level++;\
- }\
- else if (k->type == STK_MEM_START && k->zid == (mnum)) {\
- if (level == 0) break;\
- level--;\
- }\
- }\
-} while(0)
-
-#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
- int level = 0;\
- while (k < stk) {\
- if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
- if (level == 0) (start) = k->u.mem.pstr;\
- level++;\
- }\
- else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
- level--;\
- if (level == 0) {\
- (end) = k->u.mem.pstr;\
- break;\
- }\
- }\
- k++;\
- }\
-} while(0)
-
-#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_EMPTY_CHECK_START;\
- stk->zid = (cnum);\
- stk->u.empty_check.pstr = (s);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_EMPTY_CHECK_END;\
- stk->zid = (cnum);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_CALL_FRAME(pat) do {\
- STACK_ENSURE(1);\
- stk->type = STK_CALL_FRAME;\
- stk->u.call_frame.ret_addr = (pat);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_RETURN do {\
- STACK_ENSURE(1);\
- stk->type = STK_RETURN;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\
- STACK_ENSURE(1);\
- stk->type = STK_SAVE_VAL;\
- stk->zid = (sid);\
- stk->u.val.type = (stype);\
- stk->u.val.v = (UChar* )(sval);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\
- STACK_ENSURE(1);\
- stk->type = STK_SAVE_VAL;\
- stk->zid = (sid);\
- stk->u.val.type = (stype);\
- stk->u.val.v = (UChar* )(sval);\
- stk->u.val.v2 = sprev;\
- STACK_INC;\
-} while(0)
-
-#define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\
- StackType *k = stk;\
- while (k > stk_base) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \
- if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\
- (sval) = k->u.val.v;\
- break;\
- }\
- }\
-} while (0)
-
-#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \
- int level = 0;\
- StackType *k = stk;\
- while (k > stk_base) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
- if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->zid == (sid)) {\
- if (level == 0) {\
- (sval) = k->u.val.v;\
- break;\
- }\
- }\
- else if (k->type == STK_CALL_FRAME)\
- level--;\
- else if (k->type == STK_RETURN)\
- level++;\
- }\
-} while (0)
-
-#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \
- int level = 0;\
- StackType *k = stk;\
- while (k > stk_base) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \
- if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->zid == (sid)) {\
- if (level == 0) {\
- (sval) = k->u.val.v;\
- sprev = k->u.val.v2;\
- break;\
- }\
- }\
- else if (k->type == STK_CALL_FRAME)\
- level--;\
- else if (k->type == STK_RETURN)\
- level++;\
- }\
-} while (0)
-
-#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \
- int level = 0;\
- StackType *k = (stk_from);\
- while (k > stk_base) {\
- STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \
- if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\
- && k->u.val.id == (sid)) {\
- if (level == 0) {\
- (sval) = k->u.val.v;\
- break;\
- }\
- }\
- else if (k->type == STK_CALL_FRAME)\
- level--;\
- else if (k->type == STK_RETURN)\
- level++;\
- k--;\
- }\
-} while (0)
-
-#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\
- STACK_ENSURE(1);\
- stk->type = STK_CALLOUT;\
- stk->zid = ONIG_NON_NAME_ID;\
- stk->u.callout.num = (anum);\
- stk->u.callout.func = (func);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\
- STACK_ENSURE(1);\
- stk->type = STK_CALLOUT;\
- stk->zid = (aid);\
- stk->u.callout.num = (anum);\
- stk->u.callout.func = (func);\
- STACK_INC;\
-} while(0)
-
-#ifdef ONIG_DEBUG
-#define STACK_BASE_CHECK(p, at) \
- if ((p) < stk_base) {\
- fprintf(stderr, "at %s\n", at);\
- goto stack_error;\
- }
-#else
-#define STACK_BASE_CHECK(p, at)
-#endif
-
-#define STACK_POP_ONE do {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
-} while(0)
-
-
-#ifdef USE_CALLOUT
-#define POP_CALLOUT_CASE \
- else if (stk->type == STK_CALLOUT) {\
- RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\
- }
-#else
-#define POP_CALLOUT_CASE
-#endif
-
-#define STACK_POP do {\
- switch (pop_level) {\
- case STACK_POP_LEVEL_FREE:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- }\
- break;\
- case STACK_POP_LEVEL_MEM_START:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP 2"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
- mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
- }\
- }\
- break;\
- default:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP 3"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
- if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
- mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
- mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
- }\
- POP_CALLOUT_CASE\
- }\
- }\
- break;\
- }\
-} while(0)
-
-#define POP_TIL_BODY(aname, til_type) do {\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, (aname));\
- if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\
- if (stk->type == (til_type)) break;\
- else {\
- if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
- mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
- mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
- }\
- /* Don't call callout here because negation of total success by (?!..) (?<!..) */\
- }\
- }\
- }\
-} while(0)
-
-#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\
- POP_TIL_BODY("STACK_POP_TIL_ALT_PREC_READ_NOT", STK_ALT_PREC_READ_NOT);\
-} while(0)
-
-#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\
- POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\
-} while(0)
-
-
-#define STACK_EXEC_TO_VOID(k) do {\
- k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \
- if (IS_TO_VOID_TARGET(k)) {\
- if (k->type == STK_TO_VOID_START) {\
- k->type = STK_VOID;\
- break;\
- }\
- k->type = STK_VOID;\
- }\
- }\
-} while(0)
-
-#define STACK_GET_PREC_READ_START(k) do {\
- int level = 0;\
- k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
- if (IS_TO_VOID_TARGET(k)) {\
- k->type = STK_VOID;\
- }\
- else if (k->type == STK_PREC_READ_START) {\
- if (level == 0) {\
- break;\
- }\
- level--;\
- }\
- else if (k->type == STK_PREC_READ_END) {\
- level++;\
- }\
- }\
-} while(0)
-
-#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- (isnull) = (k->u.empty_check.pstr == (s));\
- break;\
- }\
- }\
- }\
-} while(0)
-
-#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
- if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
- (addr) = 0;\
- }\
- else {\
- if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
- (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
- else\
- (addr) = (UChar* )k->u.mem.prev_end;\
- }\
-} while (0)
-
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
-#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- if (k->u.empty_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
- }\
- else {\
- UChar* endp;\
- int level = 0;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START && level == 0) {\
- STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
- if (endp == 0) {\
- (isnull) = 0; break;\
- }\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
- }\
- }\
- else if (k->type == STK_PREC_READ_START) {\
- level++;\
- }\
- else if (k->type == STK_PREC_READ_END) {\
- level--;\
- }\
- k++;\
- }\
- break;\
- }\
- }\
- }\
- }\
-} while(0)
-
-#define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
- int level = 0;\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- if (level == 0) {\
- if (k->u.empty_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
- }\
- else {\
- UChar* endp;\
- int prec_level = 0;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START) {\
- if (level == 0 && prec_level == 0) {\
- STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
- if (endp == 0) {\
- (isnull) = 0; break;\
- }\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */\
- }\
- }\
- }\
- else if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) level++;\
- }\
- else if (k->type == STK_EMPTY_CHECK_END) {\
- if (k->zid == (sid)) level--;\
- }\
- else if (k->type == STK_PREC_READ_START) {\
- prec_level++;\
- }\
- else if (k->type == STK_PREC_READ_END) {\
- prec_level--;\
- }\
- k++;\
- }\
- break;\
- }\
- }\
- else {\
- level--;\
- }\
- }\
- }\
- else if (k->type == STK_EMPTY_CHECK_END) {\
- if (k->zid == (sid)) level++;\
- }\
- }\
-} while(0)
-#else
-#define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
- int level = 0;\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->u.empty_check.num == (id)) {\
- if (level == 0) {\
- (isnull) = (k->u.empty_check.pstr == (s));\
- break;\
- }\
- }\
- level--;\
- }\
- else if (k->type == STK_EMPTY_CHECK_END) {\
- level++;\
- }\
- }\
-} while(0)
-#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
-
-#define STACK_GET_REPEAT(sid, k) do {\
- int level = 0;\
- k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
- if (k->type == STK_REPEAT) {\
- if (level == 0) {\
- if (k->zid == (sid)) {\
- break;\
- }\
- }\
- }\
- else if (k->type == STK_CALL_FRAME) level--;\
- else if (k->type == STK_RETURN) level++;\
- }\
-} while(0)
-
-#define STACK_RETURN(addr) do {\
- int level = 0;\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_RETURN"); \
- if (k->type == STK_CALL_FRAME) {\
- if (level == 0) {\
- (addr) = k->u.call_frame.ret_addr;\
- break;\
- }\
- else level--;\
- }\
- else if (k->type == STK_RETURN)\
- level++;\
- }\
-} while(0)
-
-
-#define STRING_CMP(s1,s2,len) do {\
- while (len-- > 0) {\
- if (*s1++ != *s2++) goto fail;\
- }\
-} while(0)
-
-#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
- if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
- goto fail; \
-} while(0)
-
-static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
- UChar* s1, UChar** ps2, int mblen)
-{
- UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar *p1, *p2, *end1, *s2, *end2;
- int len1, len2;
-
- s2 = *ps2;
- end1 = s1 + mblen;
- end2 = s2 + mblen;
- while (s1 < end1) {
- len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
- len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
- if (len1 != len2) return 0;
- p1 = buf1;
- p2 = buf2;
- while (len1-- > 0) {
- if (*p1 != *p2) return 0;
- p1++;
- p2++;
- }
- }
-
- *ps2 = s2;
- return 1;
-}
-
-#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
- is_fail = 0;\
- while (len-- > 0) {\
- if (*s1++ != *s2++) {\
- is_fail = 1; break;\
- }\
- }\
-} while(0)
-
-#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
- if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
- is_fail = 1; \
- else \
- is_fail = 0; \
-} while(0)
-
-
-#define IS_EMPTY_STR (str == end)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
-#define DATA_ENSURE_CHECK1 (s < right_range)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
-#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
-
-#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
-
-#ifdef USE_CAPTURE_HISTORY
-static int
-make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
- StackType* stk_top, UChar* str, regex_t* reg)
-{
- int n, r;
- OnigCaptureTreeNode* child;
- StackType* k = *kp;
-
- while (k < stk_top) {
- if (k->type == STK_MEM_START) {
- n = k->zid;
- if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
- MEM_STATUS_AT(reg->capture_history, n) != 0) {
- child = history_node_new();
- CHECK_NULL_RETURN_MEMERR(child);
- child->group = n;
- child->beg = (int )(k->u.mem.pstr - str);
- r = history_tree_add_child(node, child);
- if (r != 0) return r;
- *kp = (k + 1);
- r = make_capture_history_tree(child, kp, stk_top, str, reg);
- if (r != 0) return r;
-
- k = *kp;
- child->end = (int )(k->u.mem.pstr - str);
- }
- }
- else if (k->type == STK_MEM_END) {
- if (k->zid == node->group) {
- node->end = (int )(k->u.mem.pstr - str);
- *kp = k;
- return 0;
- }
- }
- k++;
- }
-
- return 1; /* 1: root node ending. */
-}
-#endif
-
-#ifdef USE_BACKREF_WITH_LEVEL
-static int mem_is_in_memp(int mem, int num, MemNumType* memp)
-{
- int i;
-
- for (i = 0; i < num; i++) {
- if (mem == (int )memp[i]) return 1;
- }
- return 0;
-}
-
-static int
-backref_match_at_nested_level(regex_t* reg,
- StackType* top, StackType* stk_base,
- int ignore_case, int case_fold_flag,
- int nest, int mem_num, MemNumType* memp,
- UChar** s, const UChar* send)
-{
- UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
- int level;
- StackType* k;
-
- level = 0;
- k = top;
- k--;
- while (k >= stk_base) {
- if (k->type == STK_CALL_FRAME) {
- level--;
- }
- else if (k->type == STK_RETURN) {
- level++;
- }
- else if (level == nest) {
- if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->zid, mem_num, memp)) {
- pstart = k->u.mem.pstr;
- if (IS_NOT_NULL(pend)) {
- if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
- p = pstart;
- ss = *s;
-
- if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, case_fold_flag,
- pstart, &ss, (int )(pend - pstart)) == 0)
- return 0; /* or goto next_mem; */
- }
- else {
- while (p < pend) {
- if (*p++ != *ss++) return 0; /* or goto next_mem; */
- }
- }
-
- *s = ss;
- return 1;
- }
- }
- }
- else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->zid, mem_num, memp)) {
- pend = k->u.mem.pstr;
- }
- }
- }
- k--;
- }
-
- return 0;
-}
-
-static int
-backref_check_at_nested_level(regex_t* reg,
- StackType* top, StackType* stk_base,
- int nest, int mem_num, MemNumType* memp)
-{
- int level;
- StackType* k;
-
- level = 0;
- k = top;
- k--;
- while (k >= stk_base) {
- if (k->type == STK_CALL_FRAME) {
- level--;
- }
- else if (k->type == STK_RETURN) {
- level++;
- }
- else if (level == nest) {
- if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->zid, mem_num, memp)) {
- return 1;
- }
- }
- }
- k--;
- }
-
- return 0;
-}
-#endif /* USE_BACKREF_WITH_LEVEL */
-
-
-#ifdef ONIG_DEBUG_STATISTICS
-
-#define USE_TIMEOFDAY
-
-#ifdef USE_TIMEOFDAY
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-static struct timeval ts, te;
-#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
-#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
- (((te).tv_sec - (ts).tv_sec)*1000000))
-#else
-#ifdef HAVE_SYS_TIMES_H
-#include <sys/times.h>
-#endif
-static struct tms ts, te;
-#define GETTIME(t) times(&(t))
-#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
-#endif
-
-static int OpCounter[256];
-static int OpPrevCounter[256];
-static unsigned long OpTime[256];
-static int OpCurr = OP_FINISH;
-static int OpPrevTarget = OP_FAIL;
-static int MaxStackDepth = 0;
-
-#define SOP_IN(opcode) do {\
- if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
- OpCurr = opcode;\
- OpCounter[opcode]++;\
- GETTIME(ts);\
-} while(0)
-
-#define SOP_OUT do {\
- GETTIME(te);\
- OpTime[OpCurr] += TIMEDIFF(te, ts);\
-} while(0)
-
-extern void
-onig_statistics_init(void)
-{
- int i;
- for (i = 0; i < 256; i++) {
- OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
- }
- MaxStackDepth = 0;
-}
-
-extern int
-onig_print_statistics(FILE* f)
-{
- int r;
- int i;
-
- r = fprintf(f, " count prev time\n");
- if (r < 0) return -1;
-
- for (i = 0; OpInfo[i].opcode >= 0; i++) {
- r = fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name);
- if (r < 0) return -1;
- }
- r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
- if (r < 0) return -1;
-
- return 0;
-}
-
-#define STACK_INC do {\
- stk++;\
- if (stk - stk_base > MaxStackDepth) \
- MaxStackDepth = stk - stk_base;\
-} while(0)
-
-#else
-#define STACK_INC stk++
-
-#define SOP_IN(opcode)
-#define SOP_OUT
-#endif
-
-
-/* matching region of POSIX API */
-typedef int regoff_t;
-
-typedef struct {
- regoff_t rm_so;
- regoff_t rm_eo;
-} posix_regmatch_t;
-
-
-
-#ifdef USE_THREADED_CODE
-
-#define BYTECODE_INTERPRETER_START GOTO_OP;
-#define BYTECODE_INTERPRETER_END
-#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
-#define DEFAULT_OP /* L_DEFAULT: */
-#define NEXT_OP sprev = sbegin; JUMP_OP
-#define JUMP_OP GOTO_OP
-#ifdef USE_DIRECT_THREADED_CODE
-#define GOTO_OP goto *(p->opaddr)
-#else
-#define GOTO_OP goto *opcode_to_label[p->opcode]
-#endif
-#define BREAK_OP /* Nothing */
-
-#else
-
-#define BYTECODE_INTERPRETER_START \
- while (1) {\
- MATCH_DEBUG_OUT(0)\
- sbegin = s;\
- switch (p->opcode) {
-#define BYTECODE_INTERPRETER_END } sprev = sbegin; }
-#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
-#define DEFAULT_OP default:
-#define NEXT_OP break
-#define JUMP_OP GOTO_OP
-#define GOTO_OP continue; break
-#define BREAK_OP break
-
-#endif /* USE_THREADED_CODE */
-
-#define INC_OP p++
-#define NEXT_OUT SOP_OUT; NEXT_OP
-#define JUMP_OUT SOP_OUT; JUMP_OP
-#define BREAK_OUT SOP_OUT; BREAK_OP
-#define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP
-
-
-#ifdef ONIG_DEBUG_MATCH
-#define MATCH_DEBUG_OUT(offset) do {\
- Operation *xp;\
- UChar *q, *bp, buf[50];\
- int len, spos;\
- spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\
- xp = p - (offset);\
- fprintf(stderr, "%7u: %7ld: %4d> \"",\
- counter, GET_STACK_INDEX(stk), spos);\
- counter++;\
- bp = buf;\
- if (IS_NOT_NULL(s)) {\
- for (i = 0, q = s; i < 7 && q < end; i++) {\
- len = enclen(encode, q);\
- while (len-- > 0) *bp++ = *q++;\
- }\
- if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\
- else { xmemcpy(bp, "\"", 1); bp += 1; }\
- }\
- else {\
- xmemcpy(bp, "\"", 1); bp += 1;\
- }\
- *bp = 0;\
- fputs((char* )buf, stderr);\
- for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\
- if (xp == FinishCode)\
- fprintf(stderr, "----: finish");\
- else {\
- fprintf(stderr, "%4d: ", (int )(xp - reg->ops));\
- print_compiled_byte_code(stderr, reg, (int )(xp - reg->ops), reg->ops, encode);\
- }\
- fprintf(stderr, "\n");\
- } while(0);
-#else
-#define MATCH_DEBUG_OUT(offset)
-#endif
-
-
-/* match data(str - end) from position (sstart). */
-/* if sstart == str then set sprev to NULL. */
-static int
-match_at(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* in_right_range, const UChar* sstart, UChar* sprev,
- MatchArg* msa)
-{
-
-#if defined(USE_DIRECT_THREADED_CODE)
- static Operation FinishCode[] = { { .opaddr=&&L_FINISH } };
-#else
- static Operation FinishCode[] = { { OP_FINISH } };
-#endif
-
-#ifdef USE_THREADED_CODE
- static const void *opcode_to_label[] = {
- &&L_FINISH,
- &&L_END,
- &&L_EXACT1,
- &&L_EXACT2,
- &&L_EXACT3,
- &&L_EXACT4,
- &&L_EXACT5,
- &&L_EXACTN,
- &&L_EXACTMB2N1,
- &&L_EXACTMB2N2,
- &&L_EXACTMB2N3,
- &&L_EXACTMB2N,
- &&L_EXACTMB3N,
- &&L_EXACTMBN,
- &&L_EXACT1_IC,
- &&L_EXACTN_IC,
- &&L_CCLASS,
- &&L_CCLASS_MB,
- &&L_CCLASS_MIX,
- &&L_CCLASS_NOT,
- &&L_CCLASS_MB_NOT,
- &&L_CCLASS_MIX_NOT,
- &&L_ANYCHAR,
- &&L_ANYCHAR_ML,
- &&L_ANYCHAR_STAR,
- &&L_ANYCHAR_ML_STAR,
- &&L_ANYCHAR_STAR_PEEK_NEXT,
- &&L_ANYCHAR_ML_STAR_PEEK_NEXT,
- &&L_WORD,
- &&L_WORD_ASCII,
- &&L_NO_WORD,
- &&L_NO_WORD_ASCII,
- &&L_WORD_BOUNDARY,
- &&L_NO_WORD_BOUNDARY,
- &&L_WORD_BEGIN,
- &&L_WORD_END,
- &&L_TEXT_SEGMENT_BOUNDARY,
- &&L_BEGIN_BUF,
- &&L_END_BUF,
- &&L_BEGIN_LINE,
- &&L_END_LINE,
- &&L_SEMI_END_BUF,
- &&L_BEGIN_POSITION,
- &&L_BACKREF1,
- &&L_BACKREF2,
- &&L_BACKREF_N,
- &&L_BACKREF_N_IC,
- &&L_BACKREF_MULTI,
- &&L_BACKREF_MULTI_IC,
- &&L_BACKREF_WITH_LEVEL,
- &&L_BACKREF_WITH_LEVEL_IC,
- &&L_BACKREF_CHECK,
- &&L_BACKREF_CHECK_WITH_LEVEL,
- &&L_MEMORY_START,
- &&L_MEMORY_START_PUSH,
- &&L_MEMORY_END_PUSH,
- &&L_MEMORY_END_PUSH_REC,
- &&L_MEMORY_END,
- &&L_MEMORY_END_REC,
- &&L_FAIL,
- &&L_JUMP,
- &&L_PUSH,
- &&L_PUSH_SUPER,
- &&L_POP_OUT,
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- &&L_PUSH_OR_JUMP_EXACT1,
-#endif
- &&L_PUSH_IF_PEEK_NEXT,
- &&L_REPEAT,
- &&L_REPEAT_NG,
- &&L_REPEAT_INC,
- &&L_REPEAT_INC_NG,
- &&L_REPEAT_INC_SG,
- &&L_REPEAT_INC_NG_SG,
- &&L_EMPTY_CHECK_START,
- &&L_EMPTY_CHECK_END,
- &&L_EMPTY_CHECK_END_MEMST,
- &&L_EMPTY_CHECK_END_MEMST_PUSH,
- &&L_PREC_READ_START,
- &&L_PREC_READ_END,
- &&L_PREC_READ_NOT_START,
- &&L_PREC_READ_NOT_END,
- &&L_ATOMIC_START,
- &&L_ATOMIC_END,
- &&L_LOOK_BEHIND,
- &&L_LOOK_BEHIND_NOT_START,
- &&L_LOOK_BEHIND_NOT_END,
- &&L_CALL,
- &&L_RETURN,
- &&L_PUSH_SAVE_VAL,
- &&L_UPDATE_VAR,
-#ifdef USE_CALLOUT
- &&L_CALLOUT_CONTENTS,
- &&L_CALLOUT_NAME,
-#endif
- };
-#endif
-
- int i, n, num_mem, best_len, pop_level;
- LengthType tlen, tlen2;
- MemNumType mem;
- RelAddrType addr;
- UChar *s, *q, *ps, *sbegin;
- UChar *right_range;
- int is_alloca;
- char *alloc_base;
- StackType *stk_base, *stk, *stk_end;
- StackType *stkp; /* used as any purpose. */
- StackIndex si;
- StackIndex *repeat_stk;
- StackIndex *mem_start_stk, *mem_end_stk;
- UChar* keep;
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- unsigned long retry_limit_in_match;
- unsigned long retry_in_match_counter;
-#endif
-
-#ifdef USE_CALLOUT
- int of;
-#endif
-
- Operation* p = reg->ops;
- OnigOptionType option = reg->options;
- OnigEncoding encode = reg->enc;
- OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
-
-#ifdef ONIG_DEBUG_MATCH
- static unsigned int counter = 1;
-#endif
-
-#ifdef USE_DIRECT_THREADED_CODE
- if (IS_NULL(msa)) {
- for (i = 0; i < reg->ops_used; i++) {
- const void* addr;
- addr = opcode_to_label[reg->ocs[i]];
- p->opaddr = addr;
- p++;
- }
- return ONIG_NORMAL;
- }
-#endif
-
-#ifdef USE_CALLOUT
- msa->mp->match_at_call_counter++;
-#endif
-
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- retry_limit_in_match = msa->retry_limit_in_match;
-#endif
-
- pop_level = reg->stack_pop_level;
- num_mem = reg->num_mem;
- STACK_INIT(INIT_MATCH_STACK_SIZE);
- UPDATE_FOR_STACK_REALLOC;
- for (i = 1; i <= num_mem; i++) {
- mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
- }
-
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
- str, end, sstart, sprev);
- fprintf(stderr, "size: %d, start offset: %d\n",
- (int )(end - str), (int )(sstart - str));
-#endif
-
- best_len = ONIG_MISMATCH;
- keep = s = (UChar* )sstart;
- STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */
- INIT_RIGHT_RANGE;
-
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- retry_in_match_counter = 0;
-#endif
-
- BYTECODE_INTERPRETER_START {
- CASE_OP(END)
- n = (int )(s - sstart);
- if (n > best_len) {
- OnigRegion* region;
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(option)) {
- if (n > msa->best_len) {
- msa->best_len = n;
- msa->best_s = (UChar* )sstart;
- }
- else
- goto end_best_len;
- }
-#endif
- best_len = n;
- region = msa->region;
- if (region) {
- if (keep > s) keep = s;
-
-#ifdef USE_POSIX_API_REGION_OPTION
- if (IS_POSIX_REGION(msa->options)) {
- posix_regmatch_t* rmt = (posix_regmatch_t* )region;
-
- rmt[0].rm_so = (regoff_t )(keep - str);
- rmt[0].rm_eo = (regoff_t )(s - str);
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
-
- rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i]))
- - str);
- }
- else {
- rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
- }
- }
- }
- else {
-#endif /* USE_POSIX_API_REGION_OPTION */
- region->beg[0] = (int )(keep - str);
- region->end[0] = (int )(s - str);
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str);
-
- region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str);
- }
- else {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
- }
-
-#ifdef USE_CAPTURE_HISTORY
- if (reg->capture_history != 0) {
- int r;
- OnigCaptureTreeNode* node;
-
- if (IS_NULL(region->history_root)) {
- region->history_root = node = history_node_new();
- CHECK_NULL_RETURN_MEMERR(node);
- }
- else {
- node = region->history_root;
- history_tree_clear(node);
- }
-
- node->group = 0;
- node->beg = (int )(keep - str);
- node->end = (int )(s - str);
-
- stkp = stk_base;
- r = make_capture_history_tree(region->history_root, &stkp,
- stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
- }
-#endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_API_REGION_OPTION
- } /* else IS_POSIX_REGION() */
-#endif
- } /* if (region) */
- } /* n > best_len */
-
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- end_best_len:
-#endif
- SOP_OUT;
-
- if (IS_FIND_CONDITION(option)) {
- if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
- best_len = ONIG_MISMATCH;
- goto fail; /* for retry */
- }
- if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
- }
- }
-
- /* default behavior: return first-matching result. */
- goto finish;
-
- CASE_OP(EXACT1)
- DATA_ENSURE(1);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- s++;
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(EXACT1_IC)
- {
- int len;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- ps = p->exact.s;
- while (len-- > 0) {
- if (*ps != *q) goto fail;
- ps++; q++;
- }
- }
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(EXACT2)
- DATA_ENSURE(2);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- sprev = s;
- s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACT3)
- DATA_ENSURE(3);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- sprev = s;
- s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACT4)
- DATA_ENSURE(4);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- sprev = s;
- s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACT5)
- DATA_ENSURE(5);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- sprev = s;
- s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTN)
- tlen = p->exact_n.n;
- DATA_ENSURE(tlen);
- ps = p->exact_n.s;
- while (tlen-- > 0) {
- if (*ps++ != *s++) goto fail;
- }
- sprev = s - 1;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTN_IC)
- {
- int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- tlen = p->exact_n.n;
- ps = p->exact_n.s;
- endp = ps + tlen;
- while (ps < endp) {
- sprev = s;
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*ps != *q) goto fail;
- ps++; q++;
- }
- }
- }
-
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTMB2N1)
- DATA_ENSURE(2);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- s++;
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(EXACTMB2N2)
- DATA_ENSURE(4);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- sprev = s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTMB2N3)
- DATA_ENSURE(6);
- ps = p->exact.s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- sprev = s;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTMB2N)
- tlen = p->exact_n.n;
- DATA_ENSURE(tlen * 2);
- ps = p->exact_n.s;
- while (tlen-- > 0) {
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- }
- sprev = s - 2;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTMB3N)
- tlen = p->exact_n.n;
- DATA_ENSURE(tlen * 3);
- ps = p->exact_n.s;
- while (tlen-- > 0) {
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- if (*ps != *s) goto fail;
- ps++; s++;
- }
- sprev = s - 3;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EXACTMBN)
- tlen = p->exact_len_n.len; /* mb byte len */
- tlen2 = p->exact_len_n.n; /* number of chars */
- tlen2 *= tlen;
- DATA_ENSURE(tlen2);
- ps = p->exact_len_n.s;
- while (tlen2-- > 0) {
- if (*ps != *s) goto fail;
- ps++; s++;
- }
- sprev = s - tlen;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(CCLASS)
- DATA_ENSURE(1);
- if (BITSET_AT(p->cclass.bsp, *s) == 0) goto fail;
- s++;
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(CCLASS_MB)
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
-
- cclass_mb:
- {
- OnigCodePoint code;
- UChar *ss;
- int mb_len;
-
- DATA_ENSURE(1);
- mb_len = enclen(encode, s);
- DATA_ENSURE(mb_len);
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (! onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
- }
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(CCLASS_MIX)
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, s)) {
- goto cclass_mb;
- }
- else {
- if (BITSET_AT(p->cclass_mix.bsp, *s) == 0)
- goto fail;
-
- s++;
- }
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(CCLASS_NOT)
- DATA_ENSURE(1);
- if (BITSET_AT(p->cclass.bsp, *s) != 0) goto fail;
- s += enclen(encode, s);
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(CCLASS_MB_NOT)
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
- s++;
- goto cc_mb_not_success;
- }
-
- cclass_mb_not:
- {
- OnigCodePoint code;
- UChar *ss;
- int mb_len = enclen(encode, s);
-
- if (! DATA_ENSURE_CHECK(mb_len)) {
- DATA_ENSURE(1);
- s = (UChar* )end;
- goto cc_mb_not_success;
- }
-
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (onig_is_in_code_range(p->cclass_mb.mb, code)) goto fail;
- }
-
- cc_mb_not_success:
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(CCLASS_MIX_NOT)
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, s)) {
- goto cclass_mb_not;
- }
- else {
- if (BITSET_AT(p->cclass_mix.bsp, *s) != 0)
- goto fail;
-
- s++;
- }
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(ANYCHAR)
- DATA_ENSURE(1);
- n = enclen(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- s += n;
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(ANYCHAR_ML)
- DATA_ENSURE(1);
- n = enclen(encode, s);
- DATA_ENSURE(n);
- s += n;
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(ANYCHAR_STAR)
- INC_OP;
- while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- JUMP_OUT;
-
- CASE_OP(ANYCHAR_ML_STAR)
- INC_OP;
- while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- JUMP_OUT;
-
- CASE_OP(ANYCHAR_STAR_PEEK_NEXT)
- {
- UChar c;
-
- c = p->anychar_star_peek_next.c;
- INC_OP;
- while (DATA_ENSURE_CHECK1) {
- if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
- }
- n = enclen(encode, s);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- }
- NEXT_OUT;
-
- CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT)
- {
- UChar c;
-
- c = p->anychar_star_peek_next.c;
- INC_OP;
- while (DATA_ENSURE_CHECK1) {
- if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
- }
- n = enclen(encode, s);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- }
- NEXT_OUT;
-
- CASE_OP(WORD)
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
-
- s += enclen(encode, s);
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(WORD_ASCII)
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
- goto fail;
-
- s += enclen(encode, s);
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(NO_WORD)
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
-
- s += enclen(encode, s);
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(NO_WORD_ASCII)
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end))
- goto fail;
-
- s += enclen(encode, s);
- INC_OP;
- NEXT_OUT;
-
- CASE_OP(WORD_BOUNDARY)
- {
- ModeType mode;
-
- mode = p->word_boundary.mode;
- if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
- else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(NO_WORD_BOUNDARY)
- {
- ModeType mode;
-
- mode = p->word_boundary.mode;
- if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
- else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
- }
- INC_OP;
- JUMP_OUT;
-
-#ifdef USE_WORD_BEGIN_END
- CASE_OP(WORD_BEGIN)
- {
- ModeType mode;
-
- mode = p->word_boundary.mode;
- if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- INC_OP;
- JUMP_OUT;
- }
- }
- }
- goto fail;
-
- CASE_OP(WORD_END)
- {
- ModeType mode;
-
- mode = p->word_boundary.mode;
- if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- INC_OP;
- JUMP_OUT;
- }
- }
- }
- goto fail;
-#endif
-
- CASE_OP(TEXT_SEGMENT_BOUNDARY)
- {
- int is_break;
-
- switch (p->text_segment_boundary.type) {
- case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- is_break = onigenc_egcb_is_break_position(encode, s, sprev, str, end);
- break;
-#ifdef USE_UNICODE_WORD_BREAK
- case WORD_BOUNDARY:
- is_break = onigenc_wb_is_break_position(encode, s, sprev, str, end);
- break;
-#endif
- default:
- goto bytecode_error;
- break;
- }
-
- if (p->text_segment_boundary.not != 0)
- is_break = ! is_break;
-
- if (is_break != 0) {
- INC_OP;
- JUMP_OUT;
- }
- else {
- goto fail;
- }
- }
-
- CASE_OP(BEGIN_BUF)
- if (! ON_STR_BEGIN(s)) goto fail;
-
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(END_BUF)
- if (! ON_STR_END(s)) goto fail;
-
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(BEGIN_LINE)
- if (ON_STR_BEGIN(s)) {
- if (IS_NOTBOL(msa->options)) goto fail;
- INC_OP;
- JUMP_OUT;
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- INC_OP;
- JUMP_OUT;
- }
- goto fail;
-
- CASE_OP(END_LINE)
- if (ON_STR_END(s)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
-#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- INC_OP;
- JUMP_OUT;
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
-#endif
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- INC_OP;
- JUMP_OUT;
- }
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- INC_OP;
- JUMP_OUT;
- }
-#endif
- goto fail;
-
- CASE_OP(SEMI_END_BUF)
- if (ON_STR_END(s)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
-#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- INC_OP;
- JUMP_OUT;
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
-#endif
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
- ON_STR_END(s + enclen(encode, s))) {
- INC_OP;
- JUMP_OUT;
- }
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- UChar* ss = s + enclen(encode, s);
- ss += enclen(encode, ss);
- if (ON_STR_END(ss)) {
- INC_OP;
- JUMP_OUT;
- }
- }
-#endif
- goto fail;
-
- CASE_OP(BEGIN_POSITION)
- if (s != msa->start)
- goto fail;
-
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(MEMORY_START_PUSH)
- mem = p->memory_start.num;
- STACK_PUSH_MEM_START(mem, s);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(MEMORY_START)
- mem = p->memory_start.num;
- mem_start_stk[mem] = (StackIndex )((void* )s);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(MEMORY_END_PUSH)
- mem = p->memory_end.num;
- STACK_PUSH_MEM_END(mem, s);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(MEMORY_END)
- mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
- INC_OP;
- JUMP_OUT;
-
-#ifdef USE_CALL
- CASE_OP(MEMORY_END_PUSH_REC)
- mem = p->memory_end.num;
- STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
- si = GET_STACK_INDEX(stkp);
- STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = si;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(MEMORY_END_REC)
- mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
- STACK_GET_MEM_START(mem, stkp);
-
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- else
- mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
-
- STACK_PUSH_MEM_END_MARK(mem);
- INC_OP;
- JUMP_OUT;
-#endif
-
- CASE_OP(BACKREF1)
- mem = 1;
- goto backref;
-
- CASE_OP(BACKREF2)
- mem = 2;
- goto backref;
-
- CASE_OP(BACKREF_N)
- mem = p->backref_n.n1;
- backref:
- {
- int len;
- UChar *pstart, *pend;
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = (int )(pend - pstart);
- if (n != 0) {
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(s, pstart, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(BACKREF_N_IC)
- mem = p->backref_n.n1;
- {
- int len;
- UChar *pstart, *pend;
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = (int )(pend - pstart);
- if (n != 0) {
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(BACKREF_MULTI)
- {
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- tlen = p->backref_general.num;
- for (i = 0; i < tlen; i++) {
- mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = (int )(pend - pstart);
- if (n != 0) {
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(swork, pstart, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- break; /* success */
- }
- if (i == tlen) goto fail;
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(BACKREF_MULTI_IC)
- {
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- tlen = p->backref_general.num;
- for (i = 0; i < tlen; i++) {
- mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = (int )(pend - pstart);
- if (n != 0) {
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- break; /* success */
- }
- if (i == tlen) goto fail;
- }
- INC_OP;
- JUMP_OUT;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- CASE_OP(BACKREF_WITH_LEVEL_IC)
- n = 1; /* ignore case */
- goto backref_with_level;
- CASE_OP(BACKREF_WITH_LEVEL)
- {
- int len;
- int level;
- MemNumType* mems;
- UChar* ssave;
-
- n = 0;
- backref_with_level:
- level = p->backref_general.nest_level;
- tlen = p->backref_general.num;
- mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
-
- ssave = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, n,
- case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (ssave != s) {
- sprev = ssave;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- }
- else
- goto fail;
- }
- INC_OP;
- JUMP_OUT;
-#endif
-
- CASE_OP(BACKREF_CHECK)
- {
- MemNumType* mems;
-
- tlen = p->backref_general.num;
- mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
-
- for (i = 0; i < tlen; i++) {
- mem = mems[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- break; /* success */
- }
- if (i == tlen) goto fail;
- }
- INC_OP;
- JUMP_OUT;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- CASE_OP(BACKREF_CHECK_WITH_LEVEL)
- {
- LengthType level;
- MemNumType* mems;
-
- level = p->backref_general.nest_level;
- tlen = p->backref_general.num;
- mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
-
- if (backref_check_at_nested_level(reg, stk, stk_base,
- (int )level, (int )tlen, mems) == 0)
- goto fail;
- }
- INC_OP;
- JUMP_OUT;
-#endif
-
- CASE_OP(EMPTY_CHECK_START)
- mem = p->empty_check_start.mem; /* mem: null check id */
- STACK_PUSH_EMPTY_CHECK_START(mem, s);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(EMPTY_CHECK_END)
- {
- int is_empty;
-
- mem = p->empty_check_end.mem; /* mem: null check id */
- STACK_EMPTY_CHECK(is_empty, mem, s);
- INC_OP;
- if (is_empty) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
-#endif
- empty_check_found:
- /* empty loop founded, skip next instruction */
-#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
- switch (p->opcode) {
- case OP_JUMP:
- case OP_PUSH:
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
- INC_OP;
- break;
- default:
- goto unexpected_bytecode_error;
- break;
- }
-#else
- INC_OP;
-#endif
- }
- }
- JUMP_OUT;
-
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
- CASE_OP(EMPTY_CHECK_END_MEMST)
- {
- int is_empty;
-
- mem = p->empty_check_end.mem; /* mem: null check id */
- STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
- INC_OP;
- if (is_empty) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
-#endif
- if (is_empty == -1) goto fail;
- goto empty_check_found;
- }
- }
- JUMP_OUT;
-#endif
-
-#ifdef USE_CALL
- CASE_OP(EMPTY_CHECK_END_MEMST_PUSH)
- {
- int is_empty;
-
- mem = p->empty_check_end.mem; /* mem: null check id */
-#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
- STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
-#else
- STACK_EMPTY_CHECK_REC(is_empty, mem, s);
-#endif
- INC_OP;
- if (is_empty) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
- (int )mem, s);
-#endif
- if (is_empty == -1) goto fail;
- goto empty_check_found;
- }
- else {
- STACK_PUSH_EMPTY_CHECK_END(mem);
- }
- }
- JUMP_OUT;
-#endif
-
- CASE_OP(JUMP)
- addr = p->jump.addr;
- p += addr;
- CHECK_INTERRUPT_JUMP_OUT;
-
- CASE_OP(PUSH)
- addr = p->push.addr;
- STACK_PUSH_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(PUSH_SUPER)
- addr = p->push.addr;
- STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(POP_OUT)
- STACK_POP_ONE;
- /* for stop backtrack */
- /* CHECK_RETRY_LIMIT_IN_MATCH; */
- INC_OP;
- JUMP_OUT;
-
- #ifdef USE_OP_PUSH_OR_JUMP_EXACT
- CASE_OP(PUSH_OR_JUMP_EXACT1)
- {
- UChar c;
-
- addr = p->push_or_jump_exact1.addr;
- c = p->push_or_jump_exact1.c;
- if (DATA_ENSURE_CHECK1 && c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
- }
- }
- p += addr;
- JUMP_OUT;
-#endif
-
- CASE_OP(PUSH_IF_PEEK_NEXT)
- {
- UChar c;
-
- addr = p->push_if_peek_next.addr;
- c = p->push_if_peek_next.c;
- if (c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
- }
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(REPEAT)
- mem = p->repeat.id; /* mem: OP_REPEAT ID */
- addr = p->repeat.addr;
-
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(REPEAT_NG)
- mem = p->repeat.id; /* mem: OP_REPEAT ID */
- addr = p->repeat.addr;
-
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + 1, s, sprev);
- p += addr;
- }
- else
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(REPEAT_INC)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
- INC_OP;
- }
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- INC_OP;
- STACK_PUSH_ALT(p, s, sprev);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
- }
- else {
- p = stkp->u.repeat.pcode;
- }
- STACK_PUSH_REPEAT_INC(si);
- CHECK_INTERRUPT_JUMP_OUT;
-
- CASE_OP(REPEAT_INC_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc;
-
- CASE_OP(REPEAT_INC_NG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc_ng:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- Operation* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
- INC_OP;
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
- }
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- INC_OP;
- }
- CHECK_INTERRUPT_JUMP_OUT;
-
- CASE_OP(REPEAT_INC_NG_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc_ng;
-
- CASE_OP(PREC_READ_START)
- STACK_PUSH_PREC_READ_START(s, sprev);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(PREC_READ_END)
- STACK_GET_PREC_READ_START(stkp);
- s = stkp->u.state.pstr;
- sprev = stkp->u.state.pstr_prev;
- STACK_PUSH(STK_PREC_READ_END,0,0,0);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(PREC_READ_NOT_START)
- addr = p->prec_read_not_start.addr;
- STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(PREC_READ_NOT_END)
- STACK_POP_TIL_ALT_PREC_READ_NOT;
- goto fail;
-
- CASE_OP(ATOMIC_START)
- STACK_PUSH_TO_VOID_START;
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(ATOMIC_END)
- STACK_EXEC_TO_VOID(stkp);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(LOOK_BEHIND)
- tlen = p->look_behind.len;
- s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
- if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(LOOK_BEHIND_NOT_START)
- addr = p->look_behind_not_start.addr;
- tlen = p->look_behind_not_start.len;
- q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
- if (IS_NULL(q)) {
- /* too short case -> success. ex. /(?<!XXX)a/.match("a")
- If you want to change to fail, replace following line. */
- p += addr;
- /* goto fail; */
- }
- else {
- STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev);
- s = q;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
- INC_OP;
- }
- JUMP_OUT;
-
- CASE_OP(LOOK_BEHIND_NOT_END)
- STACK_POP_TIL_ALT_LOOK_BEHIND_NOT;
- INC_OP;
- goto fail;
-
-#ifdef USE_CALL
- CASE_OP(CALL)
- addr = p->call.addr;
- INC_OP; STACK_PUSH_CALL_FRAME(p);
- p = reg->ops + addr;
- JUMP_OUT;
-
- CASE_OP(RETURN)
- STACK_RETURN(p);
- STACK_PUSH_RETURN;
- JUMP_OUT;
-#endif
-
- CASE_OP(PUSH_SAVE_VAL)
- {
- SaveType type;
-
- type = p->push_save_val.type;
- mem = p->push_save_val.id; /* mem: save id */
- switch ((enum SaveType )type) {
- case SAVE_KEEP:
- STACK_PUSH_SAVE_VAL(mem, type, s);
- break;
-
- case SAVE_S:
- STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s);
- break;
-
- case SAVE_RIGHT_RANGE:
- STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range);
- break;
- }
- }
- INC_OP;
- JUMP_OUT;
-
- CASE_OP(UPDATE_VAR)
- {
- UpdateVarType type;
- enum SaveType save_type;
-
- type = p->update_var.type;
- mem = p->update_var.id; /* mem: save id */
-
- switch ((enum UpdateVarType )type) {
- case UPDATE_VAR_KEEP_FROM_STACK_LAST:
- STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep);
- break;
- case UPDATE_VAR_S_FROM_STACK:
- STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s);
- break;
- case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK:
- save_type = SAVE_S;
- goto get_save_val_type_last_id;
- break;
- case UPDATE_VAR_RIGHT_RANGE_FROM_STACK:
- save_type = SAVE_RIGHT_RANGE;
- get_save_val_type_last_id:
- STACK_GET_SAVE_VAL_TYPE_LAST_ID(save_type, mem, right_range);
- break;
- case UPDATE_VAR_RIGHT_RANGE_INIT:
- INIT_RIGHT_RANGE;
- break;
- }
- }
- INC_OP;
- JUMP_OUT;
-
-#ifdef USE_CALLOUT
- CASE_OP(CALLOUT_CONTENTS)
- of = ONIG_CALLOUT_OF_CONTENTS;
- mem = p->callout_contents.num;
- goto callout_common_entry;
- BREAK_OUT;
-
- CASE_OP(CALLOUT_NAME)
- {
- int call_result;
- int name_id;
- int in;
- CalloutListEntry* e;
- OnigCalloutFunc func;
- OnigCalloutArgs args;
-
- of = ONIG_CALLOUT_OF_NAME;
- name_id = p->callout_name.id;
- mem = p->callout_name.num;
-
- callout_common_entry:
- e = onig_reg_callout_list_at(reg, mem);
- in = e->in;
- if (of == ONIG_CALLOUT_OF_NAME) {
- func = onig_get_callout_start_func(reg, mem);
- }
- else {
- name_id = ONIG_NON_NAME_ID;
- func = msa->mp->progress_callout_of_contents;
- }
-
- if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) {
- CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id,
- (int )mem, msa->mp->callout_user_data, args, call_result);
- switch (call_result) {
- case ONIG_CALLOUT_FAIL:
- goto fail;
- break;
- case ONIG_CALLOUT_SUCCESS:
- goto retraction_callout2;
- break;
- default: /* error code */
- if (call_result > 0) {
- call_result = ONIGERR_INVALID_ARGUMENT;
- }
- best_len = call_result;
- goto finish;
- break;
- }
- }
- else {
- retraction_callout2:
- if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) {
- if (of == ONIG_CALLOUT_OF_NAME) {
- if (IS_NOT_NULL(func)) {
- STACK_PUSH_CALLOUT_NAME(name_id, mem, func);
- }
- }
- else {
- func = msa->mp->retraction_callout_of_contents;
- if (IS_NOT_NULL(func)) {
- STACK_PUSH_CALLOUT_CONTENTS(mem, func);
- }
- }
- }
- }
- }
- INC_OP;
- JUMP_OUT;
-#endif
-
- CASE_OP(FINISH)
- goto finish;
-
-#ifdef ONIG_DEBUG_STATISTICS
- fail:
- SOP_OUT;
- goto fail2;
-#endif
- CASE_OP(FAIL)
-#ifdef ONIG_DEBUG_STATISTICS
- fail2:
-#else
- fail:
-#endif
- STACK_POP;
- p = stk->u.state.pcode;
- s = stk->u.state.pstr;
- sprev = stk->u.state.pstr_prev;
- CHECK_RETRY_LIMIT_IN_MATCH;
- JUMP_OUT;
-
- DEFAULT_OP
- goto bytecode_error;
-
- } BYTECODE_INTERPRETER_END;
-
- finish:
- STACK_SAVE;
- return best_len;
-
-#ifdef ONIG_DEBUG
- stack_error:
- STACK_SAVE;
- return ONIGERR_STACK_BUG;
-#endif
-
- bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNDEFINED_BYTECODE;
-
-#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
- unexpected_bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNEXPECTED_BYTECODE;
-#endif
-
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- retry_limit_in_match_over:
- STACK_SAVE;
- return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
-#endif
-}
-
-
-static UChar*
-slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
-{
- UChar *t, *p, *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
-
- s = (UChar* )text;
-
- while (s < end) {
- if (*s == *target) {
- p = s + 1;
- t = target + 1;
- while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
- }
- if (t == target_end)
- return s;
- }
- s += enclen(enc, s);
- }
-
- return (UChar* )NULL;
-}
-
-static int
-str_lower_case_match(OnigEncoding enc, int case_fold_flag,
- const UChar* t, const UChar* tend,
- const UChar* p, const UChar* end)
-{
- int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- while (t < tend) {
- lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
- q = lowbuf;
- while (lowlen > 0) {
- if (*t++ != *q++) return 0;
- lowlen--;
- }
- }
-
- return 1;
-}
-
-static UChar*
-slow_search_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
-{
- UChar *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
-
- s = (UChar* )text;
-
- while (s < end) {
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
- return s;
-
- s += enclen(enc, s);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- UChar *t, *p, *s;
-
- s = (UChar* )text_end;
- s -= (target_end - target);
- if (s > text_start)
- s = (UChar* )text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
-
- while (s >= text) {
- //if text is not null,the logic is correct.
- //this function is only invoked by backward_search_range,parameter text come
- //from range, which is checked by "if (range == 0) goto fail" in line 4512
- //so the check is just for passing static analysis.
- if(IS_NULL(s))break;
- if (*s == *target) {
- p = s + 1;
- t = target + 1;
- while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
- }
- if (t == target_end)
- return s;
- }
- s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- UChar *s;
-
- s = (UChar* )text_end;
- s -= (target_end - target);
- if (s > text_start)
- s = (UChar* )text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
-
- while (s >= text) {
- if (str_lower_case_match(enc, case_fold_flag,
- target, target_end, s, text_end))
- return s;
-
- s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s);
- }
-
- return (UChar* )NULL;
-}
-
-
-static UChar*
-sunday_quick_search_step_forward(regex_t* reg,
- const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *p, *end;
- const UChar *tail;
- int skip, tlen1;
- int map_offset;
- OnigEncoding enc;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
-#endif
-
- enc = reg->enc;
-
- tail = target_end - 1;
- tlen1 = (int )(tail - target);
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- map_offset = reg->map_offset;
- s = text;
-
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- if (se + map_offset >= text_end) break;
- skip = reg->map[*(se + map_offset)];
-#if 0
- t = s;
- do {
- s += enclen(enc, s);
- } while ((s - t) < skip && s < end);
-#else
- s += skip;
- if (s < end)
- s = onigenc_get_right_adjust_char_head(enc, text, s);
-#endif
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *t, *p, *end;
- const UChar *tail;
- int map_offset;
-
- end = text_range + (target_end - target);
- if (end > text_end)
- end = text_end;
-
- map_offset = reg->map_offset;
- tail = target_end - 1;
- s = text + (tail - target);
-
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- if (s + map_offset >= text_end) break;
- s += reg->map[*(s + map_offset)];
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-sunday_quick_search_case_fold(regex_t* reg,
- const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *end;
- const UChar *tail;
- int skip, tlen1;
- int map_offset;
- int case_fold_flag;
- OnigEncoding enc;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
-#endif
-
- enc = reg->enc;
- case_fold_flag = reg->case_fold_flag;
-
- tail = target_end - 1;
- tlen1 = (int )(tail - target);
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- map_offset = reg->map_offset;
- s = text;
-
- while (s < end) {
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
- return (UChar* )s;
-
- se = s + tlen1;
- if (se + map_offset >= text_end) break;
- skip = reg->map[*(se + map_offset)];
-#if 0
- p = s;
- do {
- s += enclen(enc, s);
- } while ((s - p) < skip && s < end);
-#else
- /* This is faster than prev code for long text. ex: /(?i)Twain/ */
- s += skip;
- if (s < end)
- s = onigenc_get_right_adjust_char_head(enc, text, s);
-#endif
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-map_search(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* text_range)
-{
- const UChar *s = text;
-
- while (s < text_range) {
- if (map[*s]) return (UChar* )s;
-
- s += enclen(enc, s);
- }
- return (UChar* )NULL;
-}
-
-static UChar*
-map_search_backward(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* adjust_text,
- const UChar* text_start)
-{
- const UChar *s = text_start;
-
- while (s >= text) {
- //if text is not null,the logic is correct.
- //this function is only invoked by backward_search_range,parameter text come
- //from range, which is checked by "if (range == 0) goto fail" in line 4512
- //so the check is just for passing static analysis.
- if(IS_NULL(s))break;
- if (map[*s]) return (UChar* )s;
-
- s = onigenc_get_prev_char_head(enc, adjust_text, s);
- }
- return (UChar* )NULL;
-}
-extern int
-onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
- OnigRegion* region, OnigOptionType option)
-{
- int r;
- OnigMatchParam mp;
-
- onig_initialize_match_param(&mp);
- r = onig_match_with_param(reg, str, end, at, region, option, &mp);
- onig_free_match_param_content(&mp);
- return r;
-}
-
-extern int
-onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* at, OnigRegion* region, OnigOptionType option,
- OnigMatchParam* mp)
-{
- int r;
- UChar *prev;
- MatchArg msa;
-
- ADJUST_MATCH_PARAM(reg, mp);
- MATCH_ARG_INIT(msa, reg, option, region, at, mp);
- if (region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
- r = onig_region_resize_clear(region, reg->num_mem + 1);
- }
- else
- r = 0;
-
- if (r == 0) {
- if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
- if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
- r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
- goto end;
- }
- }
-
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end, end, at, prev, &msa);
- }
-
- end:
- MATCH_ARG_FREE(msa);
- return r;
-}
-
-static int
-forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
-{
- UChar *p, *pprev = (UChar* )NULL;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
- str, end, s, range);
-#endif
-
- p = s;
- if (reg->dmin > 0) {
- if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
- p += reg->dmin;
- }
- else {
- UChar *q = p + reg->dmin;
-
- if (q >= end) return 0; /* fail */
- while (p < q) p += enclen(reg->enc, p);
- }
- }
-
- retry:
- switch (reg->optimize) {
- case OPTIMIZE_STR:
- p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
- break;
- case OPTIMIZE_STR_CASE_FOLD:
- p = slow_search_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end, p, end, range);
- break;
-
- case OPTIMIZE_STR_CASE_FOLD_FAST:
- p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
- range);
- break;
-
- case OPTIMIZE_STR_FAST:
- p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
- break;
-
- case OPTIMIZE_STR_FAST_STEP_FORWARD:
- p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end,
- p, end, range);
- break;
-
- case OPTIMIZE_MAP:
- p = map_search(reg->enc, reg->map, p, range);
- break;
- }
-
- if (p && p < range) {
- if (p - reg->dmin < s) {
- retry_gate:
- pprev = p;
- p += enclen(reg->enc, p);
- goto retry;
- }
-
- if (reg->sub_anchor) {
- UChar* prev;
-
- switch (reg->sub_anchor) {
- case ANCR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
- }
- break;
-
- case ANCR_END_LINE:
- if (ON_STR_END(p)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
-#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
-#endif
- )
- goto retry_gate;
- break;
- }
- }
-
- if (reg->dmax == 0) {
- *low = p;
- if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- }
- }
- else {
- if (reg->dmax != INFINITE_LEN) {
- if (p - str < reg->dmax) {
- *low = (UChar* )str;
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
- }
- else {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
- }
- }
- }
- }
- /* no needs to adjust *high, *high is used as range check only */
- *high = p - reg->dmin;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
- (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
-#endif
- return 1; /* success */
- }
-
- return 0; /* fail */
-}
-
-
-static int
-backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
- UChar* s, const UChar* range, UChar* adjrange,
- UChar** low, UChar** high)
-{
- UChar *p;
-
- if (range == 0) goto fail;
-
- range += reg->dmin;
- p = s;
-
- retry:
- switch (reg->optimize) {
- case OPTIMIZE_STR:
- exact_method:
- p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
- range, adjrange, end, p);
- break;
-
- case OPTIMIZE_STR_CASE_FOLD:
- case OPTIMIZE_STR_CASE_FOLD_FAST:
- p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end,
- range, adjrange, end, p);
- break;
-
- case OPTIMIZE_STR_FAST:
- case OPTIMIZE_STR_FAST_STEP_FORWARD:
- goto exact_method;
- break;
-
- case OPTIMIZE_MAP:
- p = map_search_backward(reg->enc, reg->map, range, adjrange, p);
- break;
- }
-
- if (p) {
- if (reg->sub_anchor) {
- UChar* prev;
-
- switch (reg->sub_anchor) {
- case ANCR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc, str, p);
- if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
- }
- break;
-
- case ANCR_END_LINE:
- if (ON_STR_END(p)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(prev)) goto fail;
- if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
-#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
-#endif
- ) {
- p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(p)) goto fail;
- goto retry;
- }
- break;
- }
- }
-
- /* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != INFINITE_LEN) {
- *low = p - reg->dmax;
- *high = p - reg->dmin;
- *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
- }
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
- (int )(*low - str), (int )(*high - str));
-#endif
- return 1; /* success */
- }
-
- fail:
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: fail.\n");
-#endif
- return 0; /* fail */
-}
-
-
-extern int
-onig_search(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region,
- OnigOptionType option)
-{
- int r;
- OnigMatchParam mp;
-
- onig_initialize_match_param(&mp);
- r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
- onig_free_match_param_content(&mp);
- return r;
-
-}
-
-extern int
-onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region,
- OnigOptionType option, OnigMatchParam* mp)
-{
- int r;
- UChar *s, *prev;
- MatchArg msa;
- const UChar *orig_start = start;
- const UChar *orig_range = range;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
- str, (int )(end - str), (int )(start - str), (int )(range - str));
-#endif
-
- ADJUST_MATCH_PARAM(reg, mp);
-
- if (region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
- r = onig_region_resize_clear(region, reg->num_mem + 1);
- if (r != 0) goto finish_no_msa;
- }
-
- if (start > end || start < str) goto mismatch_no_msa;
-
- if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
- if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
- r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
- goto finish_no_msa;
- }
- }
-
-
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
- }
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-
-
- /* anchor optimize: resume search range */
- if (reg->anchor != 0 && str < end) {
- UChar *min_semi_end, *max_semi_end;
-
- if (reg->anchor & ANCR_BEGIN_POSITION) {
- /* search start-position only */
- begin_position:
- if (range > start)
- range = start + 1;
- else
- range = start;
- }
- else if (reg->anchor & ANCR_BEGIN_BUF) {
- /* search str-position only */
- if (range > start) {
- if (start != str) goto mismatch_no_msa;
- range = str + 1;
- }
- else {
- if (range <= str) {
- start = str;
- range = str;
- }
- else
- goto mismatch_no_msa;
- }
- }
- else if (reg->anchor & ANCR_END_BUF) {
- min_semi_end = max_semi_end = (UChar* )end;
-
- end_buf:
- if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
- goto mismatch_no_msa;
-
- if (range > start) {
- if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
- if (start < end)
- start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
- }
- if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
- }
-
- if (start > range) goto mismatch_no_msa;
- /* If start == range, match with empty at end.
- Backward search is used. */
- }
- else {
- if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
- }
- if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
- }
- if (range > start) goto mismatch_no_msa;
- }
- }
- else if (reg->anchor & ANCR_SEMI_END_BUF) {
- UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
-
- max_semi_end = (UChar* )end;
- // only when str > end, pre_end will be null
- // line 4659 "if (start > end || start < str) goto mismatch_no_msa"
- // will guarantee str alwayls less than end
- // so pre_end won't be null,this check is just for passing staic analysis
- if (IS_NOT_NULL(pre_end) && ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
-
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
- if (IS_NOT_NULL(pre_end) &&
- ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
- }
-#endif
- if (min_semi_end > str && start <= min_semi_end) {
- goto end_buf;
- }
- }
- else {
- min_semi_end = (UChar* )end;
- goto end_buf;
- }
- }
- else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) {
- goto begin_position;
- }
- }
- else if (str == end) { /* empty string */
- static const UChar* address_for_empty_string = (UChar* )"";
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search: empty string.\n");
-#endif
-
- if (reg->threshold_len == 0) {
- start = end = str = address_for_empty_string;
- s = (UChar* )start;
- prev = (UChar* )NULL;
-
- MATCH_ARG_INIT(msa, reg, option, region, start, mp);
- MATCH_AND_RETURN_CHECK(end);
- goto mismatch;
- }
- goto mismatch_no_msa;
- }
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
- (int )(end - str), (int )(start - str), (int )(range - str));
-#endif
-
- MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp);
-
- s = (UChar* )start;
- if (range > start) { /* forward search */
- if (s > str)
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- else
- prev = (UChar* )NULL;
-
- if (reg->optimize != OPTIMIZE_NONE) {
- UChar *sch_range, *low, *high, *low_prev;
-
- sch_range = (UChar* )range;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
- sch_range = (UChar* )end;
- else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
- }
- }
-
- if ((end - start) < reg->threshold_len)
- goto mismatch;
-
- if (reg->dmax != INFINITE_LEN) {
- do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
- if (s < low) {
- s = low;
- prev = low_prev;
- }
- while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s);
- }
- } while (s < range);
- goto mismatch;
- }
- else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
-
- if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
- do {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s);
-
- if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
- while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
- prev = s;
- s += enclen(reg->enc, s);
- }
- }
- } while (s < range);
- goto mismatch;
- }
- }
- }
-
- do {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s);
- } while (s < range);
-
- if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK(orig_range);
- }
- }
- else { /* backward search */
- if (range < str) goto mismatch;
-
- if (orig_start < end)
- orig_start += enclen(reg->enc, orig_start); /* is upper range */
-
- if (reg->optimize != OPTIMIZE_NONE) {
- UChar *low, *high, *adjrange, *sch_start;
-
- if (range < end)
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
- else
- adjrange = (UChar* )end;
-
- if (reg->dmax != INFINITE_LEN &&
- (end - range) >= reg->threshold_len) {
- do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
- goto mismatch;
-
- if (s > high)
- s = high;
-
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- }
- // if range is not null,the check is not necessary.
- // the range is actually the pointer of the end of the matched string
- // or assigned by "range = str" in line 4708. In RegularExpressionMatch
- // protocol, the matched string is the parameter String. And str in
- // line 4708 is the String,too. and the range is calculated from
- // "Start + onigenc_str_bytelen_null (CHAR16_ENCODING, Start)" in
- // line 146 in RegularExpressionDxe.c. RegularExpressionMatch ensure
- // the String is not null,So in both situation, the range can not be NULL.
- // This check is just for passing static analysis.
- if(IS_NULL(s))break;
- } while (s >= range);
- goto mismatch;
- }
- else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
-
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
- }
- }
-
- do {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- } while (s >= range);
- }
-
- mismatch:
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(reg->options)) {
- if (msa.best_len >= 0) {
- s = msa.best_s;
- goto match;
- }
- }
-#endif
- r = ONIG_MISMATCH;
-
- finish:
- MATCH_ARG_FREE(msa);
-
- /* If result is mismatch and no FIND_NOT_EMPTY option,
- then the region is not set in match_at(). */
- if (IS_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
- onig_region_clear(region);
- }
-
-#ifdef ONIG_DEBUG
- if (r != ONIG_MISMATCH)
- fprintf(stderr, "onig_search: error %d\n", r);
-#endif
- return r;
-
- mismatch_no_msa:
- r = ONIG_MISMATCH;
- finish_no_msa:
-#ifdef ONIG_DEBUG
- if (r != ONIG_MISMATCH)
- fprintf(stderr, "onig_search: error %d\n", r);
-#endif
- return r;
-
- match:
- MATCH_ARG_FREE(msa);
- return (int )(s - str);
-}
-
-extern int
-onig_scan(regex_t* reg, const UChar* str, const UChar* end,
- OnigRegion* region, OnigOptionType option,
- int (*scan_callback)(int, int, OnigRegion*, void*),
- void* callback_arg)
-{
- int r;
- int n;
- int rs;
- const UChar* start;
-
- if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
- if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
-
- ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
- }
-
- n = 0;
- start = str;
- while (1) {
- r = onig_search(reg, str, end, start, end, region, option);
- if (r >= 0) {
- rs = scan_callback(n, r, region, callback_arg);
- n++;
- if (rs != 0)
- return rs;
-
- if (region->end[0] == start - str) {
- if (start >= end) break;
- start += enclen(reg->enc, start);
- }
- else
- start = str + region->end[0];
-
- if (start > end)
- break;
- }
- else if (r == ONIG_MISMATCH) {
- break;
- }
- else { /* error */
- return r;
- }
- }
-
- return n;
-}
-
-extern OnigEncoding
-onig_get_encoding(regex_t* reg)
-{
- return reg->enc;
-}
-
-extern OnigOptionType
-onig_get_options(regex_t* reg)
-{
- return reg->options;
-}
-
-extern OnigCaseFoldType
-onig_get_case_fold_flag(regex_t* reg)
-{
- return reg->case_fold_flag;
-}
-
-extern OnigSyntaxType*
-onig_get_syntax(regex_t* reg)
-{
- return reg->syntax;
-}
-
-extern int
-onig_number_of_captures(regex_t* reg)
-{
- return reg->num_mem;
-}
-
-extern int
-onig_number_of_capture_histories(regex_t* reg)
-{
-#ifdef USE_CAPTURE_HISTORY
- int i, n;
-
- n = 0;
- for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (MEM_STATUS_AT(reg->capture_history, i) != 0)
- n++;
- }
- return n;
-#else
- return 0;
-#endif
-}
-
-extern void
-onig_copy_encoding(OnigEncoding to, OnigEncoding from)
-{
- *to = *from;
-}
-
-#ifdef USE_DIRECT_THREADED_CODE
-extern int
-onig_init_for_match_at(regex_t* reg)
-{
- return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
- (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
- (MatchArg* )NULL);
-}
-#endif
-
-
-/* for callout functions */
-
-#ifdef USE_CALLOUT
-
-extern OnigCalloutFunc
-onig_get_progress_callout(void)
-{
- return DefaultProgressCallout;
-}
-
-extern int
-onig_set_progress_callout(OnigCalloutFunc f)
-{
- DefaultProgressCallout = f;
- return ONIG_NORMAL;
-}
-
-extern OnigCalloutFunc
-onig_get_retraction_callout(void)
-{
- return DefaultRetractionCallout;
-}
-
-extern int
-onig_set_retraction_callout(OnigCalloutFunc f)
-{
- DefaultRetractionCallout = f;
- return ONIG_NORMAL;
-}
-
-extern int
-onig_get_callout_num_by_callout_args(OnigCalloutArgs* args)
-{
- return args->num;
-}
-
-extern OnigCalloutIn
-onig_get_callout_in_by_callout_args(OnigCalloutArgs* args)
-{
- return args->in;
-}
-
-extern int
-onig_get_name_id_by_callout_args(OnigCalloutArgs* args)
-{
- return args->name_id;
-}
-
-extern const UChar*
-onig_get_contents_by_callout_args(OnigCalloutArgs* args)
-{
- int num;
- CalloutListEntry* e;
-
- num = args->num;
- e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return 0;
- if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
- return e->u.content.start;
- }
-
- return 0;
-}
-
-extern const UChar*
-onig_get_contents_end_by_callout_args(OnigCalloutArgs* args)
-{
- int num;
- CalloutListEntry* e;
-
- num = args->num;
- e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return 0;
- if (e->of == ONIG_CALLOUT_OF_CONTENTS) {
- return e->u.content.end;
- }
-
- return 0;
-}
-
-extern int
-onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
-{
- int num;
- CalloutListEntry* e;
-
- num = args->num;
- e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
- if (e->of == ONIG_CALLOUT_OF_NAME) {
- return e->u.arg.num;
- }
-
- return ONIGERR_INVALID_ARGUMENT;
-}
-
-extern int
-onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
-{
- int num;
- CalloutListEntry* e;
-
- num = args->num;
- e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
- if (e->of == ONIG_CALLOUT_OF_NAME) {
- return e->u.arg.passed_num;
- }
-
- return ONIGERR_INVALID_ARGUMENT;
-}
-
-extern int
-onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
- OnigType* type, OnigValue* val)
-{
- int num;
- CalloutListEntry* e;
-
- num = args->num;
- e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
- if (e->of == ONIG_CALLOUT_OF_NAME) {
- if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
- if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
- return ONIG_NORMAL;
- }
-
- return ONIGERR_INVALID_ARGUMENT;
-}
-
-extern const UChar*
-onig_get_string_by_callout_args(OnigCalloutArgs* args)
-{
- return args->string;
-}
-
-extern const UChar*
-onig_get_string_end_by_callout_args(OnigCalloutArgs* args)
-{
- return args->string_end;
-}
-
-extern const UChar*
-onig_get_start_by_callout_args(OnigCalloutArgs* args)
-{
- return args->start;
-}
-
-extern const UChar*
-onig_get_right_range_by_callout_args(OnigCalloutArgs* args)
-{
- return args->right_range;
-}
-
-extern const UChar*
-onig_get_current_by_callout_args(OnigCalloutArgs* args)
-{
- return args->current;
-}
-
-extern OnigRegex
-onig_get_regex_by_callout_args(OnigCalloutArgs* args)
-{
- return args->regex;
-}
-
-extern unsigned long
-onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args)
-{
- return args->retry_in_match_counter;
-}
-
-
-extern int
-onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end)
-{
- OnigRegex reg;
- const UChar* str;
- StackType* stk_base;
- int i;
-
- i = mem_num;
- reg = a->regex;
- str = a->string;
- stk_base = a->stk_base;
-
- if (i > 0) {
- if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
- else
- *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
-
- *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )a->mem_end_stk[i])) - str);
- }
- else {
- *begin = *end = ONIG_REGION_NOTPOS;
- }
- }
- else if (i == 0) {
-#if 0
- *begin = a->start - str;
- *end = a->current - str;
-#else
- return ONIGERR_INVALID_ARGUMENT;
-#endif
- }
- else
- return ONIGERR_INVALID_ARGUMENT;
-
- return ONIG_NORMAL;
-}
-
-extern int
-onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes)
-{
- int n;
-
- n = (int )(a->stk - a->stk_base);
-
- if (used_num != 0)
- *used_num = n;
-
- if (used_bytes != 0)
- *used_bytes = n * sizeof(StackType);
-
- return ONIG_NORMAL;
-}
-
-
-/* builtin callout functions */
-
-extern int
-onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
-{
- return ONIG_CALLOUT_FAIL;
-}
-
-extern int
-onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
-{
- return ONIG_MISMATCH;
-}
-
-#if 0
-extern int
-onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
-{
- return ONIG_CALLOUT_SUCCESS;
-}
-#endif
-
-extern int
-onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
-{
- int r;
- int n;
- OnigValue val;
-
- r = onig_get_arg_by_callout_args(args, 0, 0, &val);
- if (r != ONIG_NORMAL) return r;
-
- n = (int )val.l;
- if (n >= 0) {
- n = ONIGERR_INVALID_CALLOUT_BODY;
- }
- else if (onig_is_error_code_needs_param(n)) {
- n = ONIGERR_INVALID_CALLOUT_BODY;
- }
-
- return n;
-}
-
-extern int
-onig_builtin_count(OnigCalloutArgs* args, void* user_data)
-{
- (void )onig_check_callout_data_and_clear_old_values(args);
-
- return onig_builtin_total_count(args, user_data);
-}
-
-extern int
-onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
-{
- int r;
- int slot;
- OnigType type;
- OnigValue val;
- OnigValue aval;
- OnigCodePoint count_type;
-
- r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
- if (r != ONIG_NORMAL) return r;
-
- count_type = aval.c;
- if (count_type != '>' && count_type != 'X' && count_type != '<')
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0,
- &type, &val);
- if (r < ONIG_NORMAL)
- return r;
- else if (r > ONIG_NORMAL) {
- /* type == void: initial state */
- val.l = 0;
- }
-
- if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
- slot = 2;
- if (count_type == '<')
- val.l++;
- else if (count_type == 'X')
- val.l--;
- }
- else {
- slot = 1;
- if (count_type != '<')
- val.l++;
- }
-
- r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val);
- if (r != ONIG_NORMAL) return r;
-
- /* slot 1: in progress counter, slot 2: in retraction counter */
- r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot,
- &type, &val);
- if (r < ONIG_NORMAL)
- return r;
- else if (r > ONIG_NORMAL) {
- val.l = 0;
- }
-
- val.l++;
- r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
- if (r != ONIG_NORMAL) return r;
-
- return ONIG_CALLOUT_SUCCESS;
-}
-
-extern int
-onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
-{
- int r;
- int slot;
- long max_val;
- OnigCodePoint count_type;
- OnigType type;
- OnigValue val;
- OnigValue aval;
-
- (void )onig_check_callout_data_and_clear_old_values(args);
-
- slot = 0;
- r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
- if (r < ONIG_NORMAL)
- return r;
- else if (r > ONIG_NORMAL) {
- /* type == void: initial state */
- type = ONIG_TYPE_LONG;
- val.l = 0;
- }
-
- r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
- if (r != ONIG_NORMAL) return r;
- if (type == ONIG_TYPE_TAG) {
- r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
- if (r < ONIG_NORMAL) return r;
- else if (r > ONIG_NORMAL)
- max_val = 0L;
- else
- max_val = aval.l;
- }
- else { /* LONG */
- max_val = aval.l;
- }
-
- r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
- if (r != ONIG_NORMAL) return r;
-
- count_type = aval.c;
- if (count_type != '>' && count_type != 'X' && count_type != '<')
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
- if (count_type == '<') {
- if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
- val.l++;
- }
- else if (count_type == 'X')
- val.l--;
- }
- else {
- if (count_type != '<') {
- if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
- val.l++;
- }
- }
-
- r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
- if (r != ONIG_NORMAL) return r;
-
- return ONIG_CALLOUT_SUCCESS;
-}
-
-enum OP_CMP {
- OP_EQ,
- OP_NE,
- OP_LT,
- OP_GT,
- OP_LE,
- OP_GE
-};
-
-extern int
-onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
-{
- int r;
- int slot;
- long lv;
- long rv;
- OnigType type;
- OnigValue val;
- regex_t* reg;
- enum OP_CMP op;
-
- reg = args->regex;
-
- r = onig_get_arg_by_callout_args(args, 0, &type, &val);
- if (r != ONIG_NORMAL) return r;
-
- if (type == ONIG_TYPE_TAG) {
- r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
- if (r < ONIG_NORMAL) return r;
- else if (r > ONIG_NORMAL)
- lv = 0L;
- else
- lv = val.l;
- }
- else { /* ONIG_TYPE_LONG */
- lv = val.l;
- }
-
- r = onig_get_arg_by_callout_args(args, 2, &type, &val);
- if (r != ONIG_NORMAL) return r;
-
- if (type == ONIG_TYPE_TAG) {
- r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val);
- if (r < ONIG_NORMAL) return r;
- else if (r > ONIG_NORMAL)
- rv = 0L;
- else
- rv = val.l;
- }
- else { /* ONIG_TYPE_LONG */
- rv = val.l;
- }
-
- slot = 0;
- r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val);
- if (r < ONIG_NORMAL)
- return r;
- else if (r > ONIG_NORMAL) {
- /* type == void: initial state */
- OnigCodePoint c1, c2;
- UChar* p;
-
- r = onig_get_arg_by_callout_args(args, 1, &type, &val);
- if (r != ONIG_NORMAL) return r;
-
- p = val.s.start;
- c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
- p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
- if (p < val.s.end) {
- c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end);
- p += ONIGENC_MBC_ENC_LEN(reg->enc, p);
- if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG;
- }
- else
- c2 = 0;
-
- switch (c1) {
- case '=':
- if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
- op = OP_EQ;
- break;
- case '!':
- if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG;
- op = OP_NE;
- break;
- case '<':
- if (c2 == '=') op = OP_LE;
- else if (c2 == 0) op = OP_LT;
- else return ONIGERR_INVALID_CALLOUT_ARG;
- break;
- case '>':
- if (c2 == '=') op = OP_GE;
- else if (c2 == 0) op = OP_GT;
- else return ONIGERR_INVALID_CALLOUT_ARG;
- break;
- default:
- return ONIGERR_INVALID_CALLOUT_ARG;
- break;
- }
- val.l = (long )op;
- r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
- if (r != ONIG_NORMAL) return r;
- }
- else {
- op = (enum OP_CMP )val.l;
- }
-
- switch (op) {
- case OP_EQ: r = (lv == rv); break;
- case OP_NE: r = (lv != rv); break;
- case OP_LT: r = (lv < rv); break;
- case OP_GT: r = (lv > rv); break;
- case OP_LE: r = (lv <= rv); break;
- case OP_GE: r = (lv >= rv); break;
- }
-
- return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS;
-}
-
-
-//#include <stdio.h>
-
-static FILE* OutFp;
-
-/* name start with "onig_" for macros. */
-static int
-onig_builtin_monitor(OnigCalloutArgs* args, void* user_data)
-{
- int r;
- int num;
- size_t tag_len;
- // const UChar* start;
- // const UChar* right;
- // const UChar* current;
- // const UChar* string;
- // const UChar* strend;
- const UChar* tag_start;
- const UChar* tag_end;
- regex_t* reg;
- OnigCalloutIn in;
- OnigType type;
- OnigValue val;
- char buf[20];
- // FILE* fp;
-
- // fp = OutFp;
-
- r = onig_get_arg_by_callout_args(args, 0, &type, &val);
- if (r != ONIG_NORMAL) return r;
-
- in = onig_get_callout_in_by_callout_args(args);
- if (in == ONIG_CALLOUT_IN_PROGRESS) {
- if (val.c == '<')
- return ONIG_CALLOUT_SUCCESS;
- }
- else {
- if (val.c != 'X' && val.c != '<')
- return ONIG_CALLOUT_SUCCESS;
- }
-
- num = onig_get_callout_num_by_callout_args(args);
- // start = onig_get_start_by_callout_args(args);
- // right = onig_get_right_range_by_callout_args(args);
- // current = onig_get_current_by_callout_args(args);
- // string = onig_get_string_by_callout_args(args);
- // strend = onig_get_string_end_by_callout_args(args);
- reg = onig_get_regex_by_callout_args(args);
- tag_start = onig_get_callout_tag_start(reg, num);
- tag_end = onig_get_callout_tag_end(reg, num);
-
- if (tag_start == 0)
- sprintf_s(buf, sizeof(buf), "#%d", num);
- else {
- /* CAUTION: tag string is not terminated with NULL. */
- int i;
-
- tag_len = tag_end - tag_start;
- if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1;
- for (i = 0; i < tag_len; i++) buf[i] = tag_start[i];
- buf[tag_len] = '\0';
- }
-/*
- fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n",
- buf,
- in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=",
- (int )(current - string),
- (int )(start - string),
- (int )(right - string),
- (int )(strend - string));
- //fflush(fp);
-*/
- return ONIG_CALLOUT_SUCCESS;
-}
-
-extern int
-onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */)
-{
- int id;
- char* name;
- OnigEncoding enc;
- unsigned int ts[4];
- OnigValue opts[4];
-
- if (IS_NOT_NULL(fp))
- OutFp = (FILE* )fp;
- else
- OutFp = stdout;
-
- enc = ONIG_ENCODING_ASCII;
-
- name = "MON";
- ts[0] = ONIG_TYPE_CHAR;
- opts[0].c = '>';
- BC_B_O(name, monitor, 1, ts, 1, opts);
-
- return ONIG_NORMAL;
-}
-
-#endif /* USE_CALLOUT */
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c
deleted file mode 100644
index 25bcbc2a2f33..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/reggnu.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/**********************************************************************
- reggnu.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h"
-#include "oniggnu.h"
-
-extern void
-re_free_registers(OnigRegion* r)
-{
- /* 0: don't free self */
- onig_region_free(r, 0);
-}
-
-extern int
-re_adjust_startpos(regex_t* reg, const char* string, int size,
- int startpos, int range)
-{
- if (startpos > 0 && ONIGENC_MBC_MAXLEN(reg->enc) != 1 && startpos < size) {
- UChar *p;
- UChar *s = (UChar* )string + startpos;
-
- if (range > 0) {
- p = onigenc_get_right_adjust_char_head(reg->enc, (UChar* )string, s);
- }
- else {
- p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, (UChar* )string, s);
- }
- return (int )(p - (UChar* )string);
- }
-
- return startpos;
-}
-
-extern int
-re_match(regex_t* reg, const char* str, int size, int pos,
- struct re_registers* regs)
-{
- return onig_match(reg, (UChar* )str, (UChar* )(str + size),
- (UChar* )(str + pos), regs, ONIG_OPTION_NONE);
-}
-
-extern int
-re_search(regex_t* bufp, const char* string, int size, int startpos, int range,
- struct re_registers* regs)
-{
- return onig_search(bufp, (UChar* )string, (UChar* )(string + size),
- (UChar* )(string + startpos),
- (UChar* )(string + startpos + range),
- regs, ONIG_OPTION_NONE);
-}
-
-extern int
-re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
-{
- int r;
- OnigErrorInfo einfo;
-
- r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
- if (r != ONIG_NORMAL) {
- if (IS_NOT_NULL(ebuf))
- (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
- }
-
- return r;
-}
-
-extern void
-re_free_pattern(regex_t* reg)
-{
- onig_free(reg);
-}
-
-extern int
-re_alloc_pattern(regex_t** reg)
-{
- *reg = (regex_t* )xmalloc(sizeof(regex_t));
- if (IS_NULL(*reg)) return ONIGERR_MEMORY;
-
- return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
- ONIGENC_CASE_FOLD_DEFAULT,
- OnigEncDefaultCharEncoding,
- OnigDefaultSyntax);
-}
-
-extern void
-re_set_casetable(const char* table)
-{
- onigenc_set_default_caseconv_table((UChar* )table);
-}
-
-extern void
-re_mbcinit(int mb_code)
-{
- OnigEncoding enc;
-
- switch (mb_code) {
- case RE_MBCTYPE_ASCII:
- enc = ONIG_ENCODING_ASCII;
- break;
- }
-
- onig_initialize(&enc, 1);
-
- onigenc_set_default_encoding(enc);
-}
diff --git a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c b/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
deleted file mode 100644
index 2b121690e1f7..000000000000
--- a/MdeModulePkg/Universal/RegularExpressionDxe/Oniguruma/regparse.c
+++ /dev/null
@@ -1,8461 +0,0 @@
-/**********************************************************************
- regparse.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regparse.h"
-#include "st.h"
-
-#ifdef DEBUG_NODE_FREE
-#include <stdio.h>
-#endif
-
-#define INIT_TAG_NAMES_ALLOC_NUM 5
-
-#define WARN_BUFSIZE 256
-
-#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
-
-#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
- ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
-#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
- ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
-
-
-OnigSyntaxType OnigSyntaxOniguruma = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
- ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_ONIGURUMA |
- ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
- ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
- ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
- ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
- ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
- ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
- ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
- ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
- ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
- ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
- ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
- ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
- ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
- ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
- ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
- ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
- , ( SYN_GNU_REGEX_BV |
- ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
- ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
- ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
- ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
- ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
- ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
- ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
- ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
- , ONIG_OPTION_NONE
- ,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- }
-};
-
-OnigSyntaxType OnigSyntaxRuby = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
- ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_RUBY |
- ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
- ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
- ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
- ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
- ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
- ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
- ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
- ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
- ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
- ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
- ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
- ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
- ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
- , ( SYN_GNU_REGEX_BV |
- ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
- ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
- ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
- ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
- ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
- ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
- ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
- , ONIG_OPTION_NONE
- ,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- }
-};
-
-OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
-
-extern void onig_null_warn(const char* s ARG_UNUSED) { }
-
-#ifdef DEFAULT_WARN_FUNCTION
-static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
-#else
-static OnigWarnFunc onig_warn = onig_null_warn;
-#endif
-
-#ifdef DEFAULT_VERB_WARN_FUNCTION
-static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
-#else
-static OnigWarnFunc onig_verb_warn = onig_null_warn;
-#endif
-
-extern void onig_set_warn_func(OnigWarnFunc f)
-{
- onig_warn = f;
-}
-
-extern void onig_set_verb_warn_func(OnigWarnFunc f)
-{
- onig_verb_warn = f;
-}
-
-extern void
-onig_warning(const char* s)
-{
- if (onig_warn == onig_null_warn) return ;
-
- (*onig_warn)(s);
-}
-
-#define DEFAULT_MAX_CAPTURE_NUM 32767
-
-static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
-
-extern int
-onig_set_capture_num_limit(int num)
-{
- if (num < 0) return -1;
-
- MaxCaptureNum = num;
- return 0;
-}
-
-static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
-
-extern unsigned int
-onig_get_parse_depth_limit(void)
-{
- return ParseDepthLimit;
-}
-
-extern int
-onig_set_parse_depth_limit(unsigned int depth)
-{
- if (depth == 0)
- ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
- else
- ParseDepthLimit = depth;
- return 0;
-}
-
-static int
-bbuf_init(BBuf* buf, int size)
-{
- if (size <= 0) {
- size = 0;
- buf->p = NULL;
- }
- else {
- buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
- }
-
- buf->alloc = size;
- buf->used = 0;
- return 0;
-}
-
-static void
-bbuf_free(BBuf* bbuf)
-{
- if (IS_NOT_NULL(bbuf)) {
- if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
- xfree(bbuf);
- }
-}
-
-static int
-bbuf_clone(BBuf** rto, BBuf* from)
-{
- int r;
- BBuf *to;
-
- *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_MEMERR(to);
- r = BB_INIT(to, from->alloc);
- if (r != 0) {
- xfree(to->p);
- *rto = 0;
- return r;
- }
- to->used = from->used;
- xmemcpy(to->p, from->p, from->used);
- return 0;
-}
-
-static int backref_rel_to_abs(int rel_no, ScanEnv* env)
-{
- if (rel_no > 0) {
- return env->num_mem + rel_no;
- }
- else {
- return env->num_mem + 1 + rel_no;
- }
-}
-
-#define OPTION_ON(v,f) ((v) |= (f))
-#define OPTION_OFF(v,f) ((v) &= ~(f))
-
-#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
-
-#define MBCODE_START_POS(enc) \
- (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
-
-#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
- add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
-
-#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
- r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
- if (r != 0) return r;\
- }\
-} while (0)
-
-
-#define BITSET_IS_EMPTY(bs,empty) do {\
- int i;\
- empty = 1;\
- for (i = 0; i < (int )BITSET_SIZE; i++) {\
- if ((bs)[i] != 0) {\
- empty = 0; break;\
- }\
- }\
-} while (0)
-
-static void
-bitset_set_range(BitSetRef bs, int from, int to)
-{
- int i;
- for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
- BITSET_SET_BIT(bs, i);
- }
-}
-
-#if 0
-static void
-bitset_set_all(BitSetRef bs)
-{
- int i;
- for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
-}
-#endif
-
-static void
-bitset_invert(BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
-}
-
-static void
-bitset_invert_to(BitSetRef from, BitSetRef to)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
-}
-
-static void
-bitset_and(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
-}
-
-static void
-bitset_or(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
-}
-
-static void
-bitset_copy(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
-}
-
-extern int
-onig_strncmp(const UChar* s1, const UChar* s2, int n)
-{
- int x;
-
- while (n-- > 0) {
- x = *s2++ - *s1++;
- if (x) return x;
- }
- return 0;
-}
-
-extern void
-onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
-{
- int len = (int )(end - src);
- if (len > 0) {
- xmemcpy(dest, src, len);
- dest[len] = (UChar )0;
- }
-}
-
-static int
-save_entry(ScanEnv* env, enum SaveType type, int* id)
-{
- int nid = env->save_num;
-
-#if 0
- if (IS_NULL(env->saves)) {
- int n = 10;
- env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
- CHECK_NULL_RETURN_MEMERR(env->saves);
- env->save_alloc_num = n;
- }
- else if (env->save_alloc_num <= nid) {
- int n = env->save_alloc_num * 2;
- SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n, sizeof(SaveItem)*env->save_alloc_num);
- CHECK_NULL_RETURN_MEMERR(p);
- env->saves = p;
- env->save_alloc_num = n;
- }
-
- env->saves[nid].type = type;
-#endif
-
- env->save_num++;
- *id = nid;
- return 0;
-}
-
-/* scan pattern methods */
-#define PEND_VALUE 0
-
-#define PFETCH_READY UChar* pfetch_prev
-#define PEND (p < end ? 0 : 1)
-#define PUNFETCH p = pfetch_prev
-#define PINC do { \
- pfetch_prev = p; \
- p += ONIGENC_MBC_ENC_LEN(enc, p); \
-} while (0)
-#define PFETCH(c) do { \
- c = ONIGENC_MBC_TO_CODE(enc, p, end); \
- pfetch_prev = p; \
- p += ONIGENC_MBC_ENC_LEN(enc, p); \
-} while (0)
-
-#define PINC_S do { \
- p += ONIGENC_MBC_ENC_LEN(enc, p); \
-} while (0)
-#define PFETCH_S(c) do { \
- c = ONIGENC_MBC_TO_CODE(enc, p, end); \
- p += ONIGENC_MBC_ENC_LEN(enc, p); \
-} while (0)
-
-#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
-#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
-
-static UChar*
-strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
- int capa, int oldCapa)
-{
- UChar* r;
-
- if (dest)
- r = (UChar* )xrealloc(dest, capa + 1, oldCapa);
- else
- r = (UChar* )xmalloc(capa + 1);
-
- CHECK_NULL_RETURN(r);
- onig_strcpy(r + (dest_end - dest), src, src_end);
- return r;
-}
-
-/* dest on static area */
-static UChar*
-strcat_capa_from_static(UChar* dest, UChar* dest_end,
- const UChar* src, const UChar* src_end, int capa)
-{
- UChar* r;
-
- r = (UChar* )xmalloc(capa + 1);
- CHECK_NULL_RETURN(r);
- onig_strcpy(r, dest, dest_end);
- onig_strcpy(r + (dest_end - dest), src, src_end);
- return r;
-}
-
-
-#ifdef USE_ST_LIBRARY
-
-typedef struct {
- UChar* s;
- UChar* end;
-} st_str_end_key;
-
-static int
-str_end_cmp(st_str_end_key* x, st_str_end_key* y)
-{
- UChar *p, *q;
- int c;
-
- if ((x->end - x->s) != (y->end - y->s))
- return 1;
-
- p = x->s;
- q = y->s;
- while (p < x->end) {
- c = (int )*p - (int )*q;
- if (c != 0) return c;
-
- p++; q++;
- }
-
- return 0;
-}
-
-static int
-str_end_hash(st_str_end_key* x)
-{
- UChar *p;
- int val = 0;
-
- p = x->s;
- while (p < x->end) {
- val = val * 997 + (int )*p++;
- }
-
- return val + (val >> 5);
-}
-
-extern hash_table_type*
-onig_st_init_strend_table_with_size(int size)
-{
- static struct st_hash_type hashType = {
- str_end_cmp,
- str_end_hash,
- };
-
- return (hash_table_type* )
- onig_st_init_table_with_size(&hashType, size);
-}
-
-extern int
-onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
- const UChar* end_key, hash_data_type *value)
-{
- st_str_end_key key;
-
- key.s = (UChar* )str_key;
- key.end = (UChar* )end_key;
-
- return onig_st_lookup(table, (st_data_t )(&key), value);
-}
-
-extern int
-onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
- const UChar* end_key, hash_data_type value)
-{
- st_str_end_key* key;
- int result;
-
- key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
- CHECK_NULL_RETURN_MEMERR(key);
-
- key->s = (UChar* )str_key;
- key->end = (UChar* )end_key;
- result = onig_st_insert(table, (st_data_t )key, value);
- if (result) {
- xfree(key);
- }
- return result;
-}
-
-
-#ifdef USE_CALLOUT
-
-typedef struct {
- OnigEncoding enc;
- int type; /* callout type: single or not */
- UChar* s;
- UChar* end;
-} st_callout_name_key;
-
-static int
-callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
-{
- UChar *p, *q;
- int c;
-
- if (x->enc != y->enc) return 1;
- if (x->type != y->type) return 1;
- if ((x->end - x->s) != (y->end - y->s))
- return 1;
-
- p = x->s;
- q = y->s;
- while (p < x->end) {
- c = (int )*p - (int )*q;
- if (c != 0) return c;
-
- p++; q++;
- }
-
- return 0;
-}
-
-static int
-callout_name_table_hash(st_callout_name_key* x)
-{
- UChar *p;
- int val = 0;
-
- p = x->s;
- while (p < x->end) {
- val = val * 997 + (int )*p++;
- }
-
- /* use intptr_t for escape warning in Windows */
- return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
-}
-
-extern hash_table_type*
-onig_st_init_callout_name_table_with_size(int size)
-{
- static struct st_hash_type hashType = {
- callout_name_table_cmp,
- callout_name_table_hash,
- };
-
- return (hash_table_type* )
- onig_st_init_table_with_size(&hashType, size);
-}
-
-extern int
-onig_st_lookup_callout_name_table(hash_table_type* table,
- OnigEncoding enc,
- int type,
- const UChar* str_key,
- const UChar* end_key,
- hash_data_type *value)
-{
- st_callout_name_key key;
-
- key.enc = enc;
- key.type = type;
- key.s = (UChar* )str_key;
- key.end = (UChar* )end_key;
-
- return onig_st_lookup(table, (st_data_t )(&key), value);
-}
-
-static int
-st_insert_callout_name_table(hash_table_type* table,
- OnigEncoding enc, int type,
- UChar* str_key, UChar* end_key,
- hash_data_type value)
-{
- st_callout_name_key* key;
- int result;
-
- key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
- CHECK_NULL_RETURN_MEMERR(key);
-
- /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
- key->enc = enc;
- key->type = type;
- key->s = str_key;
- key->end = end_key;
- result = onig_st_insert(table, (st_data_t )key, value);
- if (result) {
- xfree(key);
- }
- return result;
-}
-#endif
-
-#endif /* USE_ST_LIBRARY */
-
-
-#define INIT_NAME_BACKREFS_ALLOC_NUM 8
-
-typedef struct {
- UChar* name;
- int name_len; /* byte length */
- int back_num; /* number of backrefs */
- int back_alloc;
- int back_ref1;
- int* back_refs;
-} NameEntry;
-
-#ifdef USE_ST_LIBRARY
-
-#define INIT_NAMES_ALLOC_NUM 5
-
-typedef st_table NameTable;
-typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
-
-#define NAMEBUF_SIZE 24
-#define NAMEBUF_SIZE_1 25
-
-#ifdef ONIG_DEBUG
-static int
-i_print_name_entry(UChar* key, NameEntry* e, void* arg)
-{
- int i;
- FILE* fp = (FILE* )arg;
-
- fprintf(fp, "%s: ", e->name);
- if (e->back_num == 0)
- fputs("-", fp);
- else if (e->back_num == 1)
- fprintf(fp, "%d", e->back_ref1);
- else {
- for (i = 0; i < e->back_num; i++) {
- if (i > 0) fprintf(fp, ", ");
- fprintf(fp, "%d", e->back_refs[i]);
- }
- }
- fputs("\n", fp);
- return ST_CONTINUE;
-}
-
-extern int
-onig_print_names(FILE* fp, regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- fprintf(fp, "name table\n");
- onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
- fputs("\n", fp);
- }
- return 0;
-}
-#endif /* ONIG_DEBUG */
-
-static int
-i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
-{
- xfree(e->name);
- if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
- xfree(key);
- xfree(e);
- return ST_DELETE;
-}
-
-static int
-names_clear(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_free_name_entry, 0);
- }
- return 0;
-}
-
-extern int
-onig_names_free(regex_t* reg)
-{
- int r;
- NameTable* t;
-
- r = names_clear(reg);
- if (r != 0) return r;
-
- t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) onig_st_free_table(t);
- reg->name_table = (void* )NULL;
- return 0;
-}
-
-static NameEntry*
-name_find(regex_t* reg, const UChar* name, const UChar* name_end)
-{
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- e = (NameEntry* )NULL;
- if (IS_NOT_NULL(t)) {
- onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
- }
- return e;
-}
-
-typedef struct {
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
- regex_t* reg;
- void* arg;
- int ret;
- OnigEncoding enc;
-} INamesArg;
-
-static int
-i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
-{
- int r = (*(arg->func))(e->name,
- e->name + e->name_len,
- e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- arg->reg, arg->arg);
- if (r != 0) {
- arg->ret = r;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- INamesArg narg;
- NameTable* t = (NameTable* )reg->name_table;
-
- narg.ret = 0;
- if (IS_NOT_NULL(t)) {
- narg.func = func;
- narg.reg = reg;
- narg.arg = arg;
- narg.enc = reg->enc; /* should be pattern encoding. */
- onig_st_foreach(t, i_names, (HashDataType )&narg);
- }
- return narg.ret;
-}
-
-static int
-i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
-{
- int i;
-
- if (e->back_num > 1) {
- for (i = 0; i < e->back_num; i++) {
- e->back_refs[i] = map[e->back_refs[i]].new_val;
- }
- }
- else if (e->back_num == 1) {
- e->back_ref1 = map[e->back_ref1].new_val;
- }
-
- return ST_CONTINUE;
-}
-
-extern int
-onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_renumber_name, (HashDataType )map);
- }
- return 0;
-}
-
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t))
- return t->num_entries;
- else
- return 0;
-}
-
-#else /* USE_ST_LIBRARY */
-
-#define INIT_NAMES_ALLOC_NUM 8
-
-typedef struct {
- NameEntry* e;
- int num;
- int alloc;
-} NameTable;
-
-#ifdef ONIG_DEBUG
-extern int
-onig_print_names(FILE* fp, regex_t* reg)
-{
- int i, j;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t) && t->num > 0) {
- fprintf(fp, "name table\n");
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- fprintf(fp, "%s: ", e->name);
- if (e->back_num == 0) {
- fputs("-", fp);
- }
- else if (e->back_num == 1) {
- fprintf(fp, "%d", e->back_ref1);
- }
- else {
- for (j = 0; j < e->back_num; j++) {
- if (j > 0) fprintf(fp, ", ");
- fprintf(fp, "%d", e->back_refs[j]);
- }
- }
- fputs("\n", fp);
- }
- fputs("\n", fp);
- }
- return 0;
-}
-#endif
-
-static int
-names_clear(regex_t* reg)
-{
- int i;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (IS_NOT_NULL(e->name)) {
- xfree(e->name);
- e->name = NULL;
- e->name_len = 0;
- e->back_num = 0;
- e->back_alloc = 0;
- if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
- e->back_refs = (int* )NULL;
- }
- }
- if (IS_NOT_NULL(t->e)) {
- xfree(t->e);
- t->e = NULL;
- }
- t->num = 0;
- }
- return 0;
-}
-
-extern int
-onig_names_free(regex_t* reg)
-{
- int r;
- NameTable* t;
-
- r = names_clear(reg);
- if (r != 0) return r;
-
- t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) xfree(t);
- reg->name_table = NULL;
- return 0;
-}
-
-static NameEntry*
-name_find(regex_t* reg, UChar* name, UChar* name_end)
-{
- int i, len;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- len = name_end - name;
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
- return e;
- }
- }
- return (NameEntry* )NULL;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- int i, r;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- r = (*func)(e->name, e->name + e->name_len, e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- reg, arg);
- if (r != 0) return r;
- }
- }
- return 0;
-}
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t))
- return t->num;
- else
- return 0;
-}
-
-#endif /* else USE_ST_LIBRARY */
-
-static int
-name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
-{
- int r;
- int alloc;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (name_end - name <= 0)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- e = name_find(reg, name, name_end);
- if (IS_NULL(e)) {
-#ifdef USE_ST_LIBRARY
- if (IS_NULL(t)) {
- t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
- CHECK_NULL_RETURN_MEMERR(t);
- reg->name_table = (void* )t;
- }
- e = (NameEntry* )xmalloc(sizeof(NameEntry));
- CHECK_NULL_RETURN_MEMERR(e);
-
- e->name = onigenc_strdup(reg->enc, name, name_end);
- if (IS_NULL(e->name)) {
- xfree(e); return ONIGERR_MEMORY;
- }
- r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
- (HashDataType )e);
- if (r < 0) return r;
-
- e->name_len = (int )(name_end - name);
- e->back_num = 0;
- e->back_alloc = 0;
- e->back_refs = (int* )NULL;
-
-#else
-
- if (IS_NULL(t)) {
- alloc = INIT_NAMES_ALLOC_NUM;
- t = (NameTable* )xmalloc(sizeof(NameTable));
- CHECK_NULL_RETURN_MEMERR(t);
- t->e = NULL;
- t->alloc = 0;
- t->num = 0;
-
- t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
- if (IS_NULL(t->e)) {
- xfree(t);
- return ONIGERR_MEMORY;
- }
- t->alloc = alloc;
- reg->name_table = t;
- goto clear;
- }
- else if (t->num == t->alloc) {
- int i;
-
- alloc = t->alloc * 2;
- t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc, sizeof(NameEntry) * t->alloc);
- CHECK_NULL_RETURN_MEMERR(t->e);
- t->alloc = alloc;
-
- clear:
- for (i = t->num; i < t->alloc; i++) {
- t->e[i].name = NULL;
- t->e[i].name_len = 0;
- t->e[i].back_num = 0;
- t->e[i].back_alloc = 0;
- t->e[i].back_refs = (int* )NULL;
- }
- }
- e = &(t->e[t->num]);
- t->num++;
- e->name = onigenc_strdup(reg->enc, name, name_end);
- if (IS_NULL(e->name)) return ONIGERR_MEMORY;
- e->name_len = name_end - name;
-#endif
- }
-
- if (e->back_num >= 1 &&
- ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
- onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
- name, name_end);
- return ONIGERR_MULTIPLEX_DEFINED_NAME;
- }
-
- e->back_num++;
- if (e->back_num == 1) {
- e->back_ref1 = backref;
- }
- else {
- if (e->back_num == 2) {
- alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
- e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
- CHECK_NULL_RETURN_MEMERR(e->back_refs);
- e->back_alloc = alloc;
- e->back_refs[0] = e->back_ref1;
- e->back_refs[1] = backref;
- }
- else {
- if (e->back_num > e->back_alloc) {
- alloc = e->back_alloc * 2;
- e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc, sizeof(int) * e->back_alloc);
- CHECK_NULL_RETURN_MEMERR(e->back_refs);
- e->back_alloc = alloc;
- }
- e->back_refs[e->back_num - 1] = backref;
- }
- }
-
- return 0;
-}
-
-extern int
-onig_name_to_group_numbers(regex_t* reg, const UChar* name,
- const UChar* name_end, int** nums)
-{
- NameEntry* e = name_find(reg, name, name_end);
-
- if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
-
- switch (e->back_num) {
- case 0:
- break;
- case 1:
- *nums = &(e->back_ref1);
- break;
- default:
- *nums = e->back_refs;
- break;
- }
- return e->back_num;
-}
-
-static int
-name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end,
- int** nums)
-{
- regex_t* reg;
- NameEntry* e;
-
- reg = env->reg;
- e = name_find(reg, name, name_end);
-
- if (IS_NULL(e)) {
- onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
- (UChar* )name, (UChar* )name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
-
- switch (e->back_num) {
- case 0:
- break;
- case 1:
- *nums = &(e->back_ref1);
- break;
- default:
- *nums = e->back_refs;
- break;
- }
- return e->back_num;
-}
-
-extern int
-onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion *region)
-{
- int i, n, *nums;
-
- n = onig_name_to_group_numbers(reg, name, name_end, &nums);
- if (n < 0)
- return n;
- else if (n == 0)
- return ONIGERR_PARSER_BUG;
- else if (n == 1)
- return nums[0];
- else {
- if (IS_NOT_NULL(region)) {
- for (i = n - 1; i >= 0; i--) {
- if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
- return nums[i];
- }
- }
- return nums[n - 1];
- }
-}
-
-extern int
-onig_noname_group_capture_is_active(regex_t* reg)
-{
- if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
- return 0;
-
- if (onig_number_of_names(reg) > 0 &&
- IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
- return 0;
- }
-
- return 1;
-}
-
-#ifdef USE_CALLOUT
-
-typedef struct {
- OnigCalloutType type;
- int in;
- OnigCalloutFunc start_func;
- OnigCalloutFunc end_func;
- int arg_num;
- int opt_arg_num;
- unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
- OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
- UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */
-} CalloutNameListEntry;
-
-typedef struct {
- int n;
- int alloc;
- CalloutNameListEntry* v;
-} CalloutNameListType;
-
-static CalloutNameListType* GlobalCalloutNameList;
-
-static int
-make_callout_func_list(CalloutNameListType** rs, int init_size)
-{
- CalloutNameListType* s;
- CalloutNameListEntry* v;
-
- *rs = 0;
-
- s = xmalloc(sizeof(*s));
- if (IS_NULL(s)) return ONIGERR_MEMORY;
-
- v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
- if (IS_NULL(v)) {
- xfree(s);
- return ONIGERR_MEMORY;
- }
-
- s->n = 0;
- s->alloc = init_size;
- s->v = v;
-
- *rs = s;
- return ONIG_NORMAL;
-}
-
-static void
-free_callout_func_list(CalloutNameListType* s)
-{
- if (IS_NOT_NULL(s)) {
- if (IS_NOT_NULL(s->v)) {
- int i, j;
-
- for (i = 0; i < s->n; i++) {
- CalloutNameListEntry* e = s->v + i;
- for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
- if (e->arg_types[j] == ONIG_TYPE_STRING) {
- UChar* p = e->opt_defaults[j].s.start;
- if (IS_NOT_NULL(p)) xfree(p);
- }
- }
- }
- xfree(s->v);
- }
- xfree(s);
- }
-}
-
-static int
-callout_func_list_add(CalloutNameListType* s, int* rid)
-{
- if (s->n >= s->alloc) {
- int new_size = s->alloc * 2;
- CalloutNameListEntry* nv = (CalloutNameListEntry* )
- xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size, sizeof(CalloutNameListEntry)*s->alloc);
- if (IS_NULL(nv)) return ONIGERR_MEMORY;
-
- s->alloc = new_size;
- s->v = nv;
- }
-
- *rid = s->n;
-
- xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
- s->n++;
- return ONIG_NORMAL;
-}
-
-
-typedef struct {
- UChar* name;
- int name_len; /* byte length */
- int id;
-} CalloutNameEntry;
-
-#ifdef USE_ST_LIBRARY
-typedef st_table CalloutNameTable;
-#else
-typedef struct {
- CalloutNameEntry* e;
- int num;
- int alloc;
-} CalloutNameTable;
-#endif
-
-static CalloutNameTable* GlobalCalloutNameTable;
-static int CalloutNameIDCounter;
-
-#ifdef USE_ST_LIBRARY
-
-static int
-i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
- void* arg ARG_UNUSED)
-{
- xfree(e->name);
- /*xfree(key->s); */ /* is same as e->name */
- xfree(key);
- xfree(e);
- return ST_DELETE;
-}
-
-static int
-callout_name_table_clear(CalloutNameTable* t)
-{
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_free_callout_name_entry, 0);
- }
- return 0;
-}
-
-static int
-global_callout_name_table_free(void)
-{
- if (IS_NOT_NULL(GlobalCalloutNameTable)) {
- int r = callout_name_table_clear(GlobalCalloutNameTable);
- if (r != 0) return r;
-
- onig_st_free_table(GlobalCalloutNameTable);
- GlobalCalloutNameTable = 0;
- CalloutNameIDCounter = 0;
- }
-
- return 0;
-}
-
-static CalloutNameEntry*
-callout_name_find(OnigEncoding enc, int is_not_single,
- const UChar* name, const UChar* name_end)
-{
- int r;
- CalloutNameEntry* e;
- CalloutNameTable* t = GlobalCalloutNameTable;
-
- e = (CalloutNameEntry* )NULL;
- if (IS_NOT_NULL(t)) {
- r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
- (HashDataType* )((void* )(&e)));
- if (r == 0) { /* not found */
- if (enc != ONIG_ENCODING_ASCII &&
- ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
- enc = ONIG_ENCODING_ASCII;
- onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
- (HashDataType* )((void* )(&e)));
- }
- }
- }
- return e;
-}
-
-#else
-
-static int
-callout_name_table_clear(CalloutNameTable* t)
-{
- int i;
- CalloutNameEntry* e;
-
- if (IS_NOT_NULL(t)) {
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (IS_NOT_NULL(e->name)) {
- xfree(e->name);
- e->name = NULL;
- e->name_len = 0;
- e->id = 0;
- e->func = 0;
- }
- }
- if (IS_NOT_NULL(t->e)) {
- xfree(t->e);
- t->e = NULL;
- }
- t->num = 0;
- }
- return 0;
-}
-
-static int
-global_callout_name_table_free(void)
-{
- if (IS_NOT_NULL(GlobalCalloutNameTable)) {
- int r = callout_name_table_clear(GlobalCalloutNameTable);
- if (r != 0) return r;
-
- xfree(GlobalCalloutNameTable);
- GlobalCalloutNameTable = 0;
- CalloutNameIDCounter = 0;
- }
- return 0;
-}
-
-static CalloutNameEntry*
-callout_name_find(UChar* name, UChar* name_end)
-{
- int i, len;
- CalloutNameEntry* e;
- CalloutNameTable* t = Calloutnames;
-
- if (IS_NOT_NULL(t)) {
- len = name_end - name;
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
- return e;
- }
- }
- return (CalloutNameEntry* )NULL;
-}
-
-#endif
-
-/* name string must be single byte char string. */
-static int
-callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
- int is_not_single, UChar* name, UChar* name_end)
-{
- int r;
- CalloutNameEntry* e;
- CalloutNameTable* t = GlobalCalloutNameTable;
-
- *rentry = 0;
- if (name_end - name <= 0)
- return ONIGERR_INVALID_CALLOUT_NAME;
-
- e = callout_name_find(enc, is_not_single, name, name_end);
- if (IS_NULL(e)) {
-#ifdef USE_ST_LIBRARY
- if (IS_NULL(t)) {
- t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
- CHECK_NULL_RETURN_MEMERR(t);
- GlobalCalloutNameTable = t;
- }
- e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
- CHECK_NULL_RETURN_MEMERR(e);
-
- e->name = onigenc_strdup(enc, name, name_end);
- if (IS_NULL(e->name)) {
- xfree(e); return ONIGERR_MEMORY;
- }
-
- r = st_insert_callout_name_table(t, enc, is_not_single,
- e->name, (e->name + (name_end - name)),
- (HashDataType )e);
- if (r < 0) return r;
-
-#else
-
- int alloc;
-
- if (IS_NULL(t)) {
- alloc = INIT_NAMES_ALLOC_NUM;
- t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
- CHECK_NULL_RETURN_MEMERR(t);
- t->e = NULL;
- t->alloc = 0;
- t->num = 0;
-
- t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
- if (IS_NULL(t->e)) {
- xfree(t);
- return ONIGERR_MEMORY;
- }
- t->alloc = alloc;
- GlobalCalloutNameTable = t;
- goto clear;
- }
- else if (t->num == t->alloc) {
- int i;
-
- alloc = t->alloc * 2;
- t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc, sizeof(CalloutNameEntry)*t->alloc);
- CHECK_NULL_RETURN_MEMERR(t->e);
- t->alloc = alloc;
-
- clear:
- for (i = t->num; i < t->alloc; i++) {
- t->e[i].name = NULL;
- t->e[i].name_len = 0;
- t->e[i].id = 0;
- }
- }
- e = &(t->e[t->num]);
- t->num++;
- e->name = onigenc_strdup(enc, name, name_end);
- if (IS_NULL(e->name)) return ONIGERR_MEMORY;
-#endif
-
- CalloutNameIDCounter++;
- e->id = CalloutNameIDCounter;
- e->name_len = (int )(name_end - name);
- }
-
- *rentry = e;
- return e->id;
-}
-
-static int
-is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
-{
- UChar* p;
- OnigCodePoint c;
-
- if (name >= name_end) return 0;
-
- p = name;
- while (p < name_end) {
- c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
- if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
- return 0;
-
- if (p == name) {
- if (c >= '0' && c <= '9') return 0;
- }
-
- p += ONIGENC_MBC_ENC_LEN(enc, p);
- }
-
- return 1;
-}
-
-static int
-is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
-{
- UChar* p;
- OnigCodePoint c;
-
- if (name >= name_end) return 0;
-
- p = name;
- while (p < name_end) {
- c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
- if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
- return 0;
-
- if (p == name) {
- if (c >= '0' && c <= '9') return 0;
- }
-
- p += ONIGENC_MBC_ENC_LEN(enc, p);
- }
-
- return 1;
-}
-
-extern int
-onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
- UChar* name, UChar* name_end, int in,
- OnigCalloutFunc start_func,
- OnigCalloutFunc end_func,
- int arg_num, unsigned int arg_types[],
- int opt_arg_num, OnigValue opt_defaults[])
-{
- int r;
- int i;
- int j;
- int id;
- int is_not_single;
- CalloutNameEntry* e;
- CalloutNameListEntry* fe;
-
- if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
- return ONIGERR_INVALID_ARGUMENT;
-
- if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- if (opt_arg_num < 0 || opt_arg_num > arg_num)
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- if (start_func == 0 && end_func == 0)
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
- return ONIGERR_INVALID_CALLOUT_ARG;
-
- for (i = 0; i < arg_num; i++) {
- unsigned int t = arg_types[i];
- if (t == ONIG_TYPE_VOID)
- return ONIGERR_INVALID_CALLOUT_ARG;
- else {
- if (i >= arg_num - opt_arg_num) {
- if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
- t != ONIG_TYPE_TAG)
- return ONIGERR_INVALID_CALLOUT_ARG;
- }
- else {
- if (t != ONIG_TYPE_LONG) {
- t = t & ~ONIG_TYPE_LONG;
- if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
- return ONIGERR_INVALID_CALLOUT_ARG;
- }
- }
- }
- }
-
- if (! is_allowed_callout_name(enc, name, name_end)) {
- return ONIGERR_INVALID_CALLOUT_NAME;
- }
-
- is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
- id = callout_name_entry(&e, enc, is_not_single, name, name_end);
- if (id < 0) return id;
-
- r = ONIG_NORMAL;
- if (IS_NULL(GlobalCalloutNameList)) {
- r = make_callout_func_list(&GlobalCalloutNameList, 10);
- if (r != ONIG_NORMAL) return r;
- }
-
- while (id >= GlobalCalloutNameList->n) {
- int rid;
- r = callout_func_list_add(GlobalCalloutNameList, &rid);
- if (r != ONIG_NORMAL) return r;
- }
-
- fe = GlobalCalloutNameList->v + id;
- fe->type = callout_type;
- fe->in = in;
- fe->start_func = start_func;
- fe->end_func = end_func;
- fe->arg_num = arg_num;
- fe->opt_arg_num = opt_arg_num;
- fe->name = e->name;
-
- for (i = 0; i < arg_num; i++) {
- fe->arg_types[i] = arg_types[i];
- }
- for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
- if(IS_NULL(opt_defaults))return ONIGERR_INVALID_ARGUMENT;
- if (fe->arg_types[i] == ONIG_TYPE_STRING) {
- OnigValue* val;
- UChar* ds;
-
- if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
-
- val = opt_defaults + j;
- ds = onigenc_strdup(enc, val->s.start, val->s.end);
- CHECK_NULL_RETURN_MEMERR(ds);
-
- fe->opt_defaults[i].s.start = ds;
- fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start);
- }
- else {
- fe->opt_defaults[i] = opt_defaults[j];
- }
- }
-
- r = id;
- return r;
-}
-
-static int
-get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
- UChar* name, UChar* name_end, int* rid)
-{
- int r;
- CalloutNameEntry* e;
-
- if (! is_allowed_callout_name(enc, name, name_end)) {
- return ONIGERR_INVALID_CALLOUT_NAME;
- }
-
- e = callout_name_find(enc, is_not_single, name, name_end);
- if (IS_NULL(e)) {
- return ONIGERR_UNDEFINED_CALLOUT_NAME;
- }
-
- r = ONIG_NORMAL;
- *rid = e->id;
-
- return r;
-}
-
-extern OnigCalloutFunc
-onig_get_callout_start_func(regex_t* reg, int callout_num)
-{
- /* If used for callouts of contents, return 0. */
- CalloutListEntry* e;
-
- e = onig_reg_callout_list_at(reg, callout_num);
- CHECK_NULL_RETURN(e);
- return e->start_func;
-}
-
-extern const UChar*
-onig_get_callout_tag_start(regex_t* reg, int callout_num)
-{
- CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
- CHECK_NULL_RETURN(e);
- return e->tag_start;
-}
-
-extern const UChar*
-onig_get_callout_tag_end(regex_t* reg, int callout_num)
-{
- CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
- CHECK_NULL_RETURN(e);
- return e->tag_end;
-}
-
-
-extern OnigCalloutType
-onig_get_callout_type_by_name_id(int name_id)
-{
- if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
- return 0;
-
- return GlobalCalloutNameList->v[name_id].type;
-}
-
-extern OnigCalloutFunc
-onig_get_callout_start_func_by_name_id(int name_id)
-{
- if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
- return 0;
-
- return GlobalCalloutNameList->v[name_id].start_func;
-}
-
-extern OnigCalloutFunc
-onig_get_callout_end_func_by_name_id(int name_id)
-{
- if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
- return 0;
-
- return GlobalCalloutNameList->v[name_id].end_func;
-}
-
-extern int
-onig_get_callout_in_by_name_id(int name_id)
-{
- if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
- return 0;
-
- return GlobalCalloutNameList->v[name_id].in;
-}
-
-static int
-get_callout_arg_num_by_name_id(int name_id)
-{
- return GlobalCalloutNameList->v[name_id].arg_num;
-}
-
-static int
-get_callout_opt_arg_num_by_name_id(int name_id)
-{
- return GlobalCalloutNameList->v[name_id].opt_arg_num;
-}
-
-static unsigned int
-get_callout_arg_type_by_name_id(int name_id, int index)
-{
- return GlobalCalloutNameList->v[name_id].arg_types[index];
-}
-
-static OnigValue
-get_callout_opt_default_by_name_id(int name_id, int index)
-{
- return GlobalCalloutNameList->v[name_id].opt_defaults[index];
-}
-
-extern UChar*
-onig_get_callout_name_by_name_id(int name_id)
-{
- if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
- return 0;
-
- return GlobalCalloutNameList->v[name_id].name;
-}
-
-extern int
-onig_global_callout_names_free(void)
-{
- free_callout_func_list(GlobalCalloutNameList);
- GlobalCalloutNameList = 0;
-
- global_callout_name_table_free();
- return ONIG_NORMAL;
-}
-
-
-typedef st_table CalloutTagTable;
-typedef intptr_t CalloutTagVal;
-
-#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0)
-
-static int
-i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
-{
- int num;
- RegexExt* ext = (RegexExt* )arg;
-
- num = (int )e - 1;
- ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
- return ST_CONTINUE;
-}
-
-static int
-setup_ext_callout_list_values(regex_t* reg)
-{
- int i, j;
- RegexExt* ext;
-
- ext = reg->extp;
- if (IS_NOT_NULL(ext->tag_table)) {
- onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
- (st_data_t )ext);
- }
-
- for (i = 0; i < ext->callout_num; i++) {
- CalloutListEntry* e = ext->callout_list + i;
- if (e->of == ONIG_CALLOUT_OF_NAME) {
- for (j = 0; j < e->u.arg.num; j++) {
- if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
- UChar* start;
- UChar* end;
- int num;
- start = e->u.arg.vals[j].s.start;
- end = e->u.arg.vals[j].s.end;
- num = onig_get_callout_num_by_tag(reg, start, end);
- if (num < 0) return num;
- e->u.arg.vals[j].tag = num;
- }
- }
- }
- }
-
- return ONIG_NORMAL;
-}
-
-extern int
-onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
-{
- RegexExt* ext = reg->extp;
-
- if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
- if (callout_num > ext->callout_num) return 0;
-
- return (ext->callout_list[callout_num].flag &
- CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
-}
-
-static int
-i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
-{
- xfree(key);
- return ST_DELETE;
-}
-
-static int
-callout_tag_table_clear(CalloutTagTable* t)
-{
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_free_callout_tag_entry, 0);
- }
- return 0;
-}
-
-extern int
-onig_callout_tag_table_free(void* table)
-{
- CalloutTagTable* t = (CalloutTagTable* )table;
-
- if (IS_NOT_NULL(t)) {
- int r = callout_tag_table_clear(t);
- if (r != 0) return r;
-
- onig_st_free_table(t);
- }
-
- return 0;
-}
-
-extern int
-onig_get_callout_num_by_tag(regex_t* reg,
- const UChar* tag, const UChar* tag_end)
-{
- int r;
- RegexExt* ext;
- CalloutTagVal e;
-
- ext = reg->extp;
- if (IS_NULL(ext) || IS_NULL(ext->tag_table))
- return ONIGERR_INVALID_CALLOUT_TAG_NAME;
-
- r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
- (HashDataType* )((void* )(&e)));
- if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
- return (int )e;
-}
-
-static CalloutTagVal
-callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
-{
- CalloutTagVal e;
-
- e = -1;
- if (IS_NOT_NULL(t)) {
- onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
- }
- return e;
-}
-
-static int
-callout_tag_table_new(CalloutTagTable** rt)
-{
- CalloutTagTable* t;
-
- *rt = 0;
- t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
- CHECK_NULL_RETURN_MEMERR(t);
-
- *rt = t;
- return ONIG_NORMAL;
-}
-
-static int
-callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name,
- UChar* name_end, CalloutTagVal entry_val)
-{
- int r;
- CalloutTagVal val;
-
- if (name_end - name <= 0)
- return ONIGERR_INVALID_CALLOUT_TAG_NAME;
-
- val = callout_tag_find(t, name, name_end);
- if (val >= 0) {
- onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
- name, name_end);
- return ONIGERR_MULTIPLEX_DEFINED_NAME;
- }
-
- r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
- if (r < 0) return r;
-
- return ONIG_NORMAL;
-}
-
-static int
-ext_ensure_tag_table(regex_t* reg)
-{
- int r;
- RegexExt* ext;
- CalloutTagTable* t;
-
- ext = onig_get_regex_ext(reg);
- CHECK_NULL_RETURN_MEMERR(ext);
-
- if (IS_NULL(ext->tag_table)) {
- r = callout_tag_table_new(&t);
- if (r != ONIG_NORMAL) return r;
-
- ext->tag_table = t;
- }
-
- return ONIG_NORMAL;
-}
-
-static int
-callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,
- CalloutTagVal entry_val)
-{
- int r;
- RegexExt* ext;
- CalloutListEntry* e;
-
- r = ext_ensure_tag_table(reg);
- if (r != ONIG_NORMAL) return r;
-
- ext = onig_get_regex_ext(reg);
- CHECK_NULL_RETURN_MEMERR(ext);
- CHECK_NULL_RETURN_MEMERR(ext->tag_table);
- r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
-
- e = onig_reg_callout_list_at(reg, (int )entry_val);
- CHECK_NULL_RETURN_MEMERR(e);
- e->tag_start = name;
- e->tag_end = name_end;
-
- return r;
-}
-
-#endif /* USE_CALLOUT */
-
-
-#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
-
-static void
-scan_env_clear(ScanEnv* env)
-{
- MEM_STATUS_CLEAR(env->capture_history);
- MEM_STATUS_CLEAR(env->bt_mem_start);
- MEM_STATUS_CLEAR(env->bt_mem_end);
- MEM_STATUS_CLEAR(env->backrefed_mem);
- env->error = (UChar* )NULL;
- env->error_end = (UChar* )NULL;
- env->num_call = 0;
-
-#ifdef USE_CALL
- env->unset_addr_list = NULL;
- env->has_call_zero = 0;
-#endif
-
- env->num_mem = 0;
- env->num_named = 0;
- env->mem_alloc = 0;
- env->mem_env_dynamic = (MemEnv* )NULL;
-
- xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
-
- env->parse_depth = 0;
- env->keep_num = 0;
- env->save_num = 0;
- env->save_alloc_num = 0;
- env->saves = 0;
-}
-
-static int
-scan_env_add_mem_entry(ScanEnv* env)
-{
- int i, need, alloc;
- MemEnv* p;
-
- need = env->num_mem + 1;
- if (need > MaxCaptureNum && MaxCaptureNum != 0)
- return ONIGERR_TOO_MANY_CAPTURES;
-
- if (need >= SCANENV_MEMENV_SIZE) {
- if (env->mem_alloc <= need) {
- if (IS_NULL(env->mem_env_dynamic)) {
- alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;
- p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
- CHECK_NULL_RETURN_MEMERR(p);
- xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
- }
- else {
- alloc = env->mem_alloc * 2;
- p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc, sizeof(MemEnv)*env->mem_alloc);
- CHECK_NULL_RETURN_MEMERR(p);
- }
-
- for (i = env->num_mem + 1; i < alloc; i++) {
- p[i].node = NULL_NODE;
-#if 0
- p[i].in = 0;
- p[i].recursion = 0;
-#endif
- }
-
- env->mem_env_dynamic = p;
- env->mem_alloc = alloc;
- }
- }
-
- env->num_mem++;
- return env->num_mem;
-}
-
-static int
-scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
-{
- if (env->num_mem >= num)
- SCANENV_MEMENV(env)[num].node = node;
- else
- return ONIGERR_PARSER_BUG;
- return 0;
-}
-
-extern void
-onig_node_free(Node* node)
-{
- start:
- if (IS_NULL(node)) return ;
-
-#ifdef DEBUG_NODE_FREE
- fprintf(stderr, "onig_node_free: %p\n", node);
-#endif
-
- switch (NODE_TYPE(node)) {
- case NODE_STRING:
- if (STR_(node)->capacity != 0 &&
- IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
- xfree(STR_(node)->s);
- }
- break;
-
- case NODE_LIST:
- case NODE_ALT:
- onig_node_free(NODE_CAR(node));
- {
- Node* next_node = NODE_CDR(node);
-
- xfree(node);
- node = next_node;
- goto start;
- }
- break;
-
- case NODE_CCLASS:
- {
- CClassNode* cc = CCLASS_(node);
-
- if (cc->mbuf)
- bbuf_free(cc->mbuf);
- }
- break;
-
- case NODE_BACKREF:
- if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
- xfree(BACKREF_(node)->back_dynamic);
- break;
-
- case NODE_BAG:
- if (NODE_BODY(node))
- onig_node_free(NODE_BODY(node));
-
- {
- BagNode* en = BAG_(node);
- if (en->type == BAG_IF_ELSE) {
- onig_node_free(en->te.Then);
- onig_node_free(en->te.Else);
- }
- }
- break;
-
- case NODE_QUANT:
- case NODE_ANCHOR:
- if (NODE_BODY(node))
- onig_node_free(NODE_BODY(node));
- break;
-
- case NODE_CTYPE:
- case NODE_CALL:
- case NODE_GIMMICK:
- break;
- }
-
- xfree(node);
-}
-
-static void
-cons_node_free_alone(Node* node)
-{
- NODE_CAR(node) = 0;
- NODE_CDR(node) = 0;
- onig_node_free(node);
-}
-
-static Node*
-node_new(void)
-{
- Node* node;
-
- node = (Node* )xmalloc(sizeof(Node));
- CHECK_NULL_RETURN(node);
- xmemset(node, 0, sizeof(*node));
-
-#ifdef DEBUG_NODE_FREE
- fprintf(stderr, "node_new: %p\n", node);
-#endif
- return node;
-}
-
-
-static void
-initialize_cclass(CClassNode* cc)
-{
- BITSET_CLEAR(cc->bs);
- cc->flags = 0;
- cc->mbuf = NULL;
-}
-
-static Node*
-node_new_cclass(void)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_CCLASS);
- initialize_cclass(CCLASS_(node));
- return node;
-}
-
-static Node*
-node_new_ctype(int type, int not, OnigOptionType options)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_CTYPE);
- CTYPE_(node)->ctype = type;
- CTYPE_(node)->not = not;
- CTYPE_(node)->options = options;
- CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
- return node;
-}
-
-static Node*
-node_new_anychar(void)
-{
- Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
- return node;
-}
-
-static Node*
-node_new_anychar_with_fixed_option(OnigOptionType option)
-{
- CtypeNode* ct;
- Node* node;
-
- node = node_new_anychar();
- CHECK_NULL_RETURN(node);
-
- ct = CTYPE_(node);
- ct->options = option;
- NODE_STATUS_ADD(node, FIXED_OPTION);
- return node;
-}
-
-static int
-node_new_no_newline(Node** node, ScanEnv* env)
-{
- Node* n;
-
- n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE);
- CHECK_NULL_RETURN_MEMERR(n);
- *node = n;
- return 0;
-}
-
-static int
-node_new_true_anychar(Node** node, ScanEnv* env)
-{
- Node* n;
-
- n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE);
- CHECK_NULL_RETURN_MEMERR(n);
- *node = n;
- return 0;
-}
-
-static Node*
-node_new_list(Node* left, Node* right)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_LIST);
- NODE_CAR(node) = left;
- NODE_CDR(node) = right;
- return node;
-}
-
-extern Node*
-onig_node_new_list(Node* left, Node* right)
-{
- return node_new_list(left, right);
-}
-
-extern Node*
-onig_node_list_add(Node* list, Node* x)
-{
- Node *n;
-
- n = onig_node_new_list(x, NULL);
- if (IS_NULL(n)) return NULL_NODE;
-
- if (IS_NOT_NULL(list)) {
- while (IS_NOT_NULL(NODE_CDR(list)))
- list = NODE_CDR(list);
-
- NODE_CDR(list) = n;
- }
-
- return n;
-}
-
-extern Node*
-onig_node_new_alt(Node* left, Node* right)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_ALT);
- NODE_CAR(node) = left;
- NODE_CDR(node) = right;
- return node;
-}
-
-static Node*
-make_list_or_alt(NodeType type, int n, Node* ns[])
-{
- Node* r;
-
- if (n <= 0) return NULL_NODE;
-
- if (n == 1) {
- r = node_new();
- CHECK_NULL_RETURN(r);
- NODE_SET_TYPE(r, type);
- NODE_CAR(r) = ns[0];
- NODE_CDR(r) = NULL_NODE;
- }
- else {
- Node* right;
-
- r = node_new();
- CHECK_NULL_RETURN(r);
-
- right = make_list_or_alt(type, n - 1, ns + 1);
- if (IS_NULL(right)) {
- onig_node_free(r);
- return NULL_NODE;
- }
-
- NODE_SET_TYPE(r, type);
- NODE_CAR(r) = ns[0];
- NODE_CDR(r) = right;
- }
-
- return r;
-}
-
-static Node*
-make_list(int n, Node* ns[])
-{
- return make_list_or_alt(NODE_LIST, n, ns);
-}
-
-static Node*
-make_alt(int n, Node* ns[])
-{
- return make_list_or_alt(NODE_ALT, n, ns);
-}
-
-extern Node*
-onig_node_new_anchor(int type, int ascii_mode)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_ANCHOR);
- ANCHOR_(node)->type = type;
- ANCHOR_(node)->char_len = -1;
- ANCHOR_(node)->ascii_mode = ascii_mode;
- return node;
-}
-
-static Node*
-node_new_backref(int back_num, int* backrefs, int by_name,
-#ifdef USE_BACKREF_WITH_LEVEL
- int exist_level, int nest_level,
-#endif
- ScanEnv* env)
-{
- int i;
- Node* node = node_new();
-
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_BACKREF);
- BACKREF_(node)->back_num = back_num;
- BACKREF_(node)->back_dynamic = (int* )NULL;
- if (by_name != 0)
- NODE_STATUS_ADD(node, BY_NAME);
-
-#ifdef USE_BACKREF_WITH_LEVEL
- if (exist_level != 0) {
- NODE_STATUS_ADD(node, NEST_LEVEL);
- BACKREF_(node)->nest_level = nest_level;
- }
-#endif
-
- for (i = 0; i < back_num; i++) {
- if (backrefs[i] <= env->num_mem &&
- IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
- NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
- break;
- }
- }
-
- if (back_num <= NODE_BACKREFS_SIZE) {
- for (i = 0; i < back_num; i++)
- BACKREF_(node)->back_static[i] = backrefs[i];
- }
- else {
- int* p = (int* )xmalloc(sizeof(int) * back_num);
- if (IS_NULL(p)) {
- onig_node_free(node);
- return NULL;
- }
- BACKREF_(node)->back_dynamic = p;
- for (i = 0; i < back_num; i++)
- p[i] = backrefs[i];
- }
- return node;
-}
-
-static Node*
-node_new_backref_checker(int back_num, int* backrefs, int by_name,
-#ifdef USE_BACKREF_WITH_LEVEL
- int exist_level, int nest_level,
-#endif
- ScanEnv* env)
-{
- Node* node;
-
- node = node_new_backref(back_num, backrefs, by_name,
-#ifdef USE_BACKREF_WITH_LEVEL
- exist_level, nest_level,
-#endif
- env);
- CHECK_NULL_RETURN(node);
-
- NODE_STATUS_ADD(node, CHECKER);
- return node;
-}
-
-#ifdef USE_CALL
-static Node*
-node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_CALL);
- CALL_(node)->by_number = by_number;
- CALL_(node)->name = name;
- CALL_(node)->name_end = name_end;
- CALL_(node)->group_num = gnum;
- CALL_(node)->entry_count = 1;
- return node;
-}
-#endif
-
-static Node*
-node_new_quantifier(int lower, int upper, int by_number)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_QUANT);
- QUANT_(node)->lower = lower;
- QUANT_(node)->upper = upper;
- QUANT_(node)->greedy = 1;
- QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
- QUANT_(node)->head_exact = NULL_NODE;
- QUANT_(node)->next_head_exact = NULL_NODE;
- QUANT_(node)->is_refered = 0;
- if (by_number != 0)
- NODE_STATUS_ADD(node, BY_NUMBER);
-
- return node;
-}
-
-static Node*
-node_new_bag(enum BagType type)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_BAG);
- BAG_(node)->type = type;
-
- switch (type) {
- case BAG_MEMORY:
- BAG_(node)->m.regnum = 0;
- BAG_(node)->m.called_addr = -1;
- BAG_(node)->m.entry_count = 1;
- BAG_(node)->m.called_state = 0;
- break;
-
- case BAG_OPTION:
- BAG_(node)->o.options = 0;
- break;
-
- case BAG_STOP_BACKTRACK:
- break;
-
- case BAG_IF_ELSE:
- BAG_(node)->te.Then = 0;
- BAG_(node)->te.Else = 0;
- break;
- }
-
- BAG_(node)->opt_count = 0;
- return node;
-}
-
-extern Node*
-onig_node_new_bag(enum BagType type)
-{
- return node_new_bag(type);
-}
-
-static Node*
-node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
-{
- Node* n;
- n = node_new_bag(BAG_IF_ELSE);
- CHECK_NULL_RETURN(n);
-
- NODE_BODY(n) = cond;
- BAG_(n)->te.Then = Then;
- BAG_(n)->te.Else = Else;
- return n;
-}
-
-static Node*
-node_new_memory(int is_named)
-{
- Node* node = node_new_bag(BAG_MEMORY);
- CHECK_NULL_RETURN(node);
- if (is_named != 0)
- NODE_STATUS_ADD(node, NAMED_GROUP);
-
- return node;
-}
-
-static Node*
-node_new_option(OnigOptionType option)
-{
- Node* node = node_new_bag(BAG_OPTION);
- CHECK_NULL_RETURN(node);
- BAG_(node)->o.options = option;
- return node;
-}
-
-static Node*
-node_new_group(Node* content)
-{
- Node* node;
-
- node = node_new();
- CHECK_NULL_RETURN(node);
- NODE_SET_TYPE(node, NODE_LIST);
- NODE_CAR(node) = content;
- NODE_CDR(node) = NULL_NODE;
-
- return node;
-}
-
-static Node*
-node_drop_group(Node* group)
-{
- Node* content;
-
- content = NODE_CAR(group);
- NODE_CAR(group) = NULL_NODE;
- onig_node_free(group);
- return content;
-}
-
-static int
-node_new_fail(Node** node, ScanEnv* env)
-{
- *node = node_new();
- CHECK_NULL_RETURN_MEMERR(*node);
-
- NODE_SET_TYPE(*node, NODE_GIMMICK);
- GIMMICK_(*node)->type = GIMMICK_FAIL;
- return ONIG_NORMAL;
-}
-
-static int
-node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env)
-{
- int id;
- int r;
-
- r = save_entry(env, save_type, &id);
- if (r != ONIG_NORMAL) return r;
-
- *node = node_new();
- CHECK_NULL_RETURN_MEMERR(*node);
-
- NODE_SET_TYPE(*node, NODE_GIMMICK);
- GIMMICK_(*node)->id = id;
- GIMMICK_(*node)->type = GIMMICK_SAVE;
- GIMMICK_(*node)->detail_type = (int )save_type;
-
- return ONIG_NORMAL;
-}
-
-static int
-node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
- int id, ScanEnv* env)
-{
- *node = node_new();
- CHECK_NULL_RETURN_MEMERR(*node);
-
- NODE_SET_TYPE(*node, NODE_GIMMICK);
- GIMMICK_(*node)->id = id;
- GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
- GIMMICK_(*node)->detail_type = (int )update_var_type;
-
- return ONIG_NORMAL;
-}
-
-static int
-node_new_keep(Node** node, ScanEnv* env)
-{
- int r;
-
- r = node_new_save_gimmick(node, SAVE_KEEP, env);
- if (r != 0) return r;
-
- env->keep_num++;
- return ONIG_NORMAL;
-}
-
-#ifdef USE_CALLOUT
-
-extern void
-onig_free_reg_callout_list(int n, CalloutListEntry* list)
-{
- int i;
- int j;
-
- if (IS_NULL(list)) return ;
-
- for (i = 0; i < n; i++) {
- if (list[i].of == ONIG_CALLOUT_OF_NAME) {
- for (j = 0; j < list[i].u.arg.passed_num; j++) {
- if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
- if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
- xfree(list[i].u.arg.vals[j].s.start);
- }
- }
- }
- else { /* ONIG_CALLOUT_OF_CONTENTS */
- if (IS_NOT_NULL(list[i].u.content.start)) {
- xfree((void* )list[i].u.content.start);
- }
- }
- }
-
- xfree(list);
-}
-
-extern CalloutListEntry*
-onig_reg_callout_list_at(regex_t* reg, int num)
-{
- RegexExt* ext = reg->extp;
- CHECK_NULL_RETURN(ext);
-
- if (num <= 0 || num > ext->callout_num)
- return 0;
-
- num--;
- return ext->callout_list + num;
-}
-
-static int
-reg_callout_list_entry(ScanEnv* env, int* rnum)
-{
-#define INIT_CALLOUT_LIST_NUM 3
-
- int num;
- CalloutListEntry* list;
- CalloutListEntry* e;
- RegexExt* ext;
-
- ext = onig_get_regex_ext(env->reg);
- CHECK_NULL_RETURN_MEMERR(ext);
-
- if (IS_NULL(ext->callout_list)) {
- list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
- CHECK_NULL_RETURN_MEMERR(list);
-
- ext->callout_list = list;
- ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
- ext->callout_num = 0;
- }
-
- num = ext->callout_num + 1;
- if (num > ext->callout_list_alloc) {
- int alloc = ext->callout_list_alloc * 2;
- list = (CalloutListEntry* )xrealloc(ext->callout_list,
- sizeof(CalloutListEntry) * alloc,
- sizeof(CalloutListEntry) * ext->callout_list_alloc);
- CHECK_NULL_RETURN_MEMERR(list);
-
- ext->callout_list = list;
- ext->callout_list_alloc = alloc;
- }
-
- e = ext->callout_list + (num - 1);
-
- e->flag = 0;
- e->of = 0;
- e->in = ONIG_CALLOUT_OF_CONTENTS;
- e->type = 0;
- e->tag_start = 0;
- e->tag_end = 0;
- e->start_func = 0;
- e->end_func = 0;
- e->u.arg.num = 0;
- e->u.arg.passed_num = 0;
-
- ext->callout_num = num;
- *rnum = num;
- return ONIG_NORMAL;
-}
-
-static int
-node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
- ScanEnv* env)
-{
- *node = node_new();
- CHECK_NULL_RETURN_MEMERR(*node);
-
- NODE_SET_TYPE(*node, NODE_GIMMICK);
- GIMMICK_(*node)->id = id;
- GIMMICK_(*node)->num = num;
- GIMMICK_(*node)->type = GIMMICK_CALLOUT;
- GIMMICK_(*node)->detail_type = (int )callout_of;
-
- return ONIG_NORMAL;
-}
-#endif
-
-static int
-make_text_segment(Node** node, ScanEnv* env)
-{
- int r;
- int i;
- Node* x;
- Node* ns[2];
-
- /* \X == (?>\O(?:\Y\O)*) */
-
- ns[1] = NULL_NODE;
-
- r = ONIGERR_MEMORY;
- ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
- if (IS_NULL(ns[0])) goto err;
-
- r = node_new_true_anychar(&ns[1], env);
- if (r != 0) goto err1;
-
- x = make_list(2, ns);
- if (IS_NULL(x)) goto err;
- ns[0] = x;
- ns[1] = NULL_NODE;
-
- x = node_new_quantifier(0, INFINITE_REPEAT, 1);
- if (IS_NULL(x)) goto err;
-
- NODE_BODY(x) = ns[0];
- ns[0] = NULL_NODE;
- ns[1] = x;
-
- r = node_new_true_anychar(&ns[0], env);
- if (r != 0) goto err1;
-
- x = make_list(2, ns);
- if (IS_NULL(x)) goto err;
-
- ns[0] = x;
- ns[1] = NULL_NODE;
-
- x = node_new_bag(BAG_STOP_BACKTRACK);
- if (IS_NULL(x)) goto err;
-
- NODE_BODY(x) = ns[0];
-
- *node = x;
- return ONIG_NORMAL;
-
- err:
- r = ONIGERR_MEMORY;
- err1:
- for (i = 0; i < 2; i++) onig_node_free(ns[i]);
- return r;
-}
-
-static int
-make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
- Node* step_one, int lower, int upper, int possessive,
- int is_range_cutter, ScanEnv* env)
-{
- int r;
- int i;
- int id;
- Node* x;
- Node* ns[4];
-
- for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
-
- ns[1] = absent;
- ns[3] = step_one; /* for err */
- r = node_new_save_gimmick(&ns[0], SAVE_S, env);
- if (r != 0) goto err;
-
- id = GIMMICK_(ns[0])->id;
- r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
- id, env);
- if (r != 0) goto err;
-
- r = node_new_fail(&ns[3], env);
- if (r != 0) goto err;
-
- x = make_list(4, ns);
- if (IS_NULL(x)) goto err0;
-
- ns[0] = x;
- ns[1] = step_one;
- ns[2] = ns[3] = NULL_NODE;
-
- x = make_alt(2, ns);
- if (IS_NULL(x)) goto err0;
-
- ns[0] = x;
-
- x = node_new_quantifier(lower, upper, 0);
- if (IS_NULL(x)) goto err0;
-
- NODE_BODY(x) = ns[0];
- ns[0] = x;
-
- if (possessive != 0) {
- x = node_new_bag(BAG_STOP_BACKTRACK);
- if (IS_NULL(x)) goto err0;
-
- NODE_BODY(x) = ns[0];
- ns[0] = x;
- }
-
- r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
- pre_save_right_id, env);
- if (r != 0) goto err;
-
- r = node_new_fail(&ns[2], env);
- if (r != 0) goto err;
-
- x = make_list(2, ns + 1);
- if (IS_NULL(x)) goto err0;
-
- ns[1] = x; ns[2] = NULL_NODE;
-
- x = make_alt(2, ns);
- if (IS_NULL(x)) goto err0;
-
- if (is_range_cutter != 0)
- NODE_STATUS_ADD(x, SUPER);
-
- *node = x;
- return ONIG_NORMAL;
-
- err0:
- r = ONIGERR_MEMORY;
- err:
- for (i = 0; i < 4; i++) onig_node_free(ns[i]);
- return r;
-}
-
-static int
-make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
- ScanEnv* env)
-{
- int r;
- int id;
- Node* save;
- Node* x;
- Node* ns[2];
-
- *node1 = *node2 = NULL_NODE;
- save = ns[0] = ns[1] = NULL_NODE;
-
- r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
- if (r != 0) goto err;
-
- id = GIMMICK_(save)->id;
- r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
- id, env);
- if (r != 0) goto err;
-
- r = node_new_fail(&ns[1], env);
- if (r != 0) goto err;
-
- x = make_list(2, ns);
- if (IS_NULL(x)) goto err0;
-
- ns[0] = NULL_NODE; ns[1] = x;
-
- r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
- pre_save_right_id, env);
- if (r != 0) goto err;
-
- x = make_alt(2, ns);
- if (IS_NULL(x)) goto err0;
-
- *node1 = save;
- *node2 = x;
- return ONIG_NORMAL;
-
- err0:
- r = ONIGERR_MEMORY;
- err:
- onig_node_free(save);
- onig_node_free(ns[0]);
- onig_node_free(ns[1]);
- return r;
-}
-
-static int
-make_range_clear(Node** node, ScanEnv* env)
-{
- int r;
- int id;
- Node* save;
- Node* x;
- Node* ns[2];
-
- *node = NULL_NODE;
- save = ns[0] = ns[1] = NULL_NODE;
-
- r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
- if (r != 0) goto err;
-
- id = GIMMICK_(save)->id;
- r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
- id, env);
- if (r != 0) goto err;
-
- r = node_new_fail(&ns[1], env);
- if (r != 0) goto err;
-
- x = make_list(2, ns);
- if (IS_NULL(x)) goto err0;
-
- ns[0] = NULL_NODE; ns[1] = x;
-
- r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
- if (r != 0) goto err;
-
- x = make_alt(2, ns);
- if (IS_NULL(x)) goto err0;
-
- NODE_STATUS_ADD(x, SUPER);
-
- ns[0] = save;
- ns[1] = x;
- save = NULL_NODE;
- x = make_list(2, ns);
- if (IS_NULL(x)) goto err0;
-
- *node = x;
- return ONIG_NORMAL;
-
- err0:
- r = ONIGERR_MEMORY;
- err:
- onig_node_free(save);
- onig_node_free(ns[0]);
- onig_node_free(ns[1]);
- return r;
-}
-
-static int
-is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
- int* is_possessive, ScanEnv* env)
-{
- Node* quant;
- Node* body;
-
- *rquant = *rbody = 0;
- *is_possessive = 0;
-
- if (NODE_TYPE(node) == NODE_QUANT) {
- quant = node;
- }
- else {
- if (NODE_TYPE(node) == NODE_BAG) {
- BagNode* en = BAG_(node);
- if (en->type == BAG_STOP_BACKTRACK) {
- *is_possessive = 1;
- quant = NODE_BAG_BODY(en);
- if (NODE_TYPE(quant) != NODE_QUANT)
- return 0;
- }
- else
- return 0;
- }
- else
- return 0;
- }
-
- if (QUANT_(quant)->greedy == 0)
- return 0;
-
- body = NODE_BODY(quant);
- switch (NODE_TYPE(body)) {
- case NODE_STRING:
- {
- int len;
- StrNode* sn = STR_(body);
- UChar *s = sn->s;
-
- len = 0;
- while (s < sn->end) {
- s += enclen(env->enc, s);
- len++;
- }
- if (len != 1)
- return 0;
- }
-
- case NODE_CCLASS:
- break;
-
- default:
- return 0;
- break;
- }
-
- if (node != quant) {
- NODE_BODY(node) = 0;
- onig_node_free(node);
- }
- NODE_BODY(quant) = NULL_NODE;
- *rquant = quant;
- *rbody = body;
- return 1;
-}
-
-static int
-make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant,
- Node* body, int possessive, ScanEnv* env)
-{
- int r;
- int i;
- int id1;
- int lower, upper;
- Node* x;
- Node* ns[4];
-
- *node = NULL_NODE;
- r = ONIGERR_MEMORY;
- ns[0] = ns[1] = NULL_NODE;
- ns[2] = body, ns[3] = absent;
-
- lower = QUANT_(quant)->lower;
- upper = QUANT_(quant)->upper;
- onig_node_free(quant);
-
- r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
- if (r != 0) goto err;
-
- id1 = GIMMICK_(ns[0])->id;
-
- r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
- 0, env);
- if (r != 0) goto err;
-
- ns[2] = ns[3] = NULL_NODE;
-
- r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
- id1, env);
- if (r != 0) goto err;
-
- x = make_list(3, ns);
- if (IS_NULL(x)) goto err0;
-
- *node = x;
- return ONIG_NORMAL;
-
- err0:
- r = ONIGERR_MEMORY;
- err:
- for (i = 0; i < 4; i++) onig_node_free(ns[i]);
- return r;
-}
-
-static int
-make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
- ScanEnv* env)
-{
- int r;
- int i;
- int id1, id2;
- int possessive;
- Node* x;
- Node* ns[7];
-
- r = ONIGERR_MEMORY;
- for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
- ns[4] = expr; ns[5] = absent;
-
- if (is_range_cutter == 0) {
- Node* quant;
- Node* body;
-
- if (expr == NULL_NODE) {
- /* default expr \O* */
- quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
- if (IS_NULL(quant)) goto err0;
-
- r = node_new_true_anychar(&body, env);
- if (r != 0) {
- onig_node_free(quant);
- goto err;
- }
- possessive = 0;
- goto simple;
- }
- else {
- if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
- simple:
- r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
- body, possessive, env);
- if (r != 0) {
- ns[4] = NULL_NODE;
- onig_node_free(quant);
- onig_node_free(body);
- goto err;
- }
-
- return ONIG_NORMAL;
- }
- }
- }
-
- r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
- if (r != 0) goto err;
-
- id1 = GIMMICK_(ns[0])->id;
-
- r = node_new_save_gimmick(&ns[1], SAVE_S, env);
- if (r != 0) goto err;
-
- id2 = GIMMICK_(ns[1])->id;
-
- r = node_new_true_anychar(&ns[3], env);
- if (r != 0) goto err;
-
- possessive = 1;
- r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
- possessive, is_range_cutter, env);
- if (r != 0) goto err;
-
- ns[3] = NULL_NODE;
- ns[5] = NULL_NODE;
-
- r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
- if (r != 0) goto err;
-
- if (is_range_cutter != 0) {
- x = make_list(4, ns);
- if (IS_NULL(x)) goto err0;
- }
- else {
- r = make_absent_tail(&ns[5], &ns[6], id1, env);
- if (r != 0) goto err;
-
- x = make_list(7, ns);
- if (IS_NULL(x)) goto err0;
- }
-
- *node = x;
- return ONIG_NORMAL;
-
- err0:
- r = ONIGERR_MEMORY;
- err:
- for (i = 0; i < 7; i++) onig_node_free(ns[i]);
- return r;
-}
-
-extern int
-onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
-{
- int addlen = (int )(end - s);
-
- if (addlen > 0) {
- int len = (int )(STR_(node)->end - STR_(node)->s);
-
- if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) {
- UChar* p;
- int capa = len + addlen + NODE_STRING_MARGIN;
-
- if (capa <= STR_(node)->capacity) {
- onig_strcpy(STR_(node)->s + len, s, end);
- }
- else {
- if (STR_(node)->s == STR_(node)->buf)
- p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
- s, end, capa);
- else
- p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa, STR_(node)->capacity);
-
- CHECK_NULL_RETURN_MEMERR(p);
- STR_(node)->s = p;
- STR_(node)->capacity = capa;
- }
- }
- else {
- onig_strcpy(STR_(node)->s + len, s, end);
- }
- STR_(node)->end = STR_(node)->s + len + addlen;
- }
-
- return 0;
-}
-
-extern int
-onig_node_str_set(Node* node, const UChar* s, const UChar* end)
-{
- onig_node_str_clear(node);
- return onig_node_str_cat(node, s, end);
-}
-
-static int
-node_str_cat_char(Node* node, UChar c)
-{
- UChar s[1];
-
- s[0] = c;
- return onig_node_str_cat(node, s, s + 1);
-}
-
-extern void
-onig_node_conv_to_str_node(Node* node, int flag)
-{
- NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->flag = flag;
- STR_(node)->capacity = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
-}
-
-extern void
-onig_node_str_clear(Node* node)
-{
- if (STR_(node)->capacity != 0 &&
- IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
- xfree(STR_(node)->s);
- }
-
- STR_(node)->capacity = 0;
- STR_(node)->flag = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
-}
-
-static Node*
-node_new_str(const UChar* s, const UChar* end)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->capacity = 0;
- STR_(node)->flag = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
- if (onig_node_str_cat(node, s, end)) {
- onig_node_free(node);
- return NULL;
- }
- return node;
-}
-
-extern Node*
-onig_node_new_str(const UChar* s, const UChar* end)
-{
- return node_new_str(s, end);
-}
-
-static Node*
-node_new_str_raw(UChar* s, UChar* end)
-{
- Node* node = node_new_str(s, end);
- CHECK_NULL_RETURN(node);
- NODE_STRING_SET_RAW(node);
- return node;
-}
-
-static Node*
-node_new_empty(void)
-{
- return node_new_str(NULL, NULL);
-}
-
-static Node*
-node_new_str_raw_char(UChar c)
-{
- int i;
- UChar p[1];
- Node* node;
-
- p[0] = c;
- node = node_new_str_raw(p, p + 1);
- CHECK_NULL_RETURN(node);
-
- /* clear buf tail */
- for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
- STR_(node)->buf[i] = '\0';
-
- return node;
-}
-
-static Node*
-str_node_split_last_char(Node* node, OnigEncoding enc)
-{
- const UChar *p;
- Node* rn;
- StrNode* sn;
-
- sn = STR_(node);
- rn = NULL_NODE;
- if (sn->end > sn->s) {
- p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
- if (p && p > sn->s) { /* can be split. */
- rn = node_new_str(p, sn->end);
- CHECK_NULL_RETURN(rn);
- if (NODE_STRING_IS_RAW(node))
- NODE_STRING_SET_RAW(rn);
-
- sn->end = (UChar* )p;
- }
- }
- return rn;
-}
-
-static int
-str_node_can_be_split(Node* node, OnigEncoding enc)
-{
- StrNode* sn = STR_(node);
- if (sn->end > sn->s) {
- return ((enclen(enc, sn->s) < sn->end - sn->s) ? 1 : 0);
- }
- return 0;
-}
-
-extern int
-onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
-{
- unsigned int num, val;
- OnigCodePoint c;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- while (! PEND) {
- PFETCH(c);
- if (IS_CODE_DIGIT_ASCII(enc, c)) {
- val = (unsigned int )DIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 10UL < num)
- return -1; /* overflow */
-
- num = num * 10 + val;
- }
- else {
- PUNFETCH;
- break;
- }
- }
- *src = p;
- return num;
-}
-
-static int
-scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
- int maxlen, OnigEncoding enc)
-{
- OnigCodePoint c;
- unsigned int num, val;
- int n;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- n = 0;
- while (! PEND && n < maxlen) {
- PFETCH(c);
- if (IS_CODE_XDIGIT_ASCII(enc, c)) {
- n++;
- val = (unsigned int )XDIGITVAL(enc,c);
- if ((INT_MAX_LIMIT - val) / 16UL < num)
- return ONIGERR_TOO_BIG_NUMBER; /* overflow */
-
- num = (num << 4) + XDIGITVAL(enc,c);
- }
- else {
- PUNFETCH;
- break;
- }
- }
-
- if (n < minlen)
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-
- *src = p;
- return num;
-}
-
-static int
-scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
-{
- OnigCodePoint c;
- unsigned int num, val;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- while (! PEND && maxlen-- != 0) {
- PFETCH(c);
- if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
- val = ODIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 8UL < num)
- return -1; /* overflow */
-
- num = (num << 3) + val;
- }
- else {
- PUNFETCH;
- break;
- }
- }
- *src = p;
- return num;
-}
-
-
-#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
- BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
-
-/* data format:
- [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
- (all data size is OnigCodePoint)
- */
-static int
-new_code_range(BBuf** pbuf)
-{
-#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
- int r;
- OnigCodePoint n;
- BBuf* bbuf;
-
- bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_MEMERR(bbuf);
- r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
- if (r != 0) {
- xfree(bbuf);
- *pbuf = 0;
- return r;
- }
-
- n = 0;
- BB_WRITE_CODE_POINT(bbuf, 0, n);
- return 0;
-}
-
-static int
-add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
-{
- int r, inc_n, pos;
- int low, high, bound, x;
- OnigCodePoint n, *data;
- BBuf* bbuf;
-
- if (from > to) {
- n = from; from = to; to = n;
- }
-
- if (IS_NULL(*pbuf)) {
- r = new_code_range(pbuf);
- if (r != 0) return r;
- bbuf = *pbuf;
- n = 0;
- }
- else {
- bbuf = *pbuf;
- GET_CODE_POINT(n, bbuf->p);
- }
- data = (OnigCodePoint* )(bbuf->p);
- data++;
-
- for (low = 0, bound = n; low < bound; ) {
- x = (low + bound) >> 1;
- if (from > data[x*2 + 1])
- low = x + 1;
- else
- bound = x;
- }
-
- high = (to == ~((OnigCodePoint )0)) ? n : low;
- for (bound = n; high < bound; ) {
- x = (high + bound) >> 1;
- if (to + 1 >= data[x*2])
- high = x + 1;
- else
- bound = x;
- }
-
- inc_n = low + 1 - high;
- if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
- return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
-
- if (inc_n != 1) {
- if (from > data[low*2])
- from = data[low*2];
- if (to < data[(high - 1)*2 + 1])
- to = data[(high - 1)*2 + 1];
- }
-
- if (inc_n != 0 && (OnigCodePoint )high < n) {
- int from_pos = SIZE_CODE_POINT * (1 + high * 2);
- int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
- int size = (n - high) * 2 * SIZE_CODE_POINT;
-
- if (inc_n > 0) {
- BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
- }
- else {
- BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
- }
- }
-
- pos = SIZE_CODE_POINT * (1 + low * 2);
- BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
- BB_WRITE_CODE_POINT(bbuf, pos, from);
- BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
- n += inc_n;
- BB_WRITE_CODE_POINT(bbuf, 0, n);
-
- return 0;
-}
-
-static int
-add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
-{
- if (from > to) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- return 0;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
-
- return add_code_range_to_buf(pbuf, from, to);
-}
-
-static int
-not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
-{
- int r, i, n;
- OnigCodePoint pre, from, *data, to = 0;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf)) {
- set_all:
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- }
-
- data = (OnigCodePoint* )(bbuf->p);
- GET_CODE_POINT(n, data);
- data++;
- if (n <= 0) goto set_all;
-
- r = 0;
- pre = MBCODE_START_POS(enc);
- for (i = 0; i < n; i++) {
- from = data[i*2];
- to = data[i*2+1];
- if (pre <= from - 1) {
- r = add_code_range_to_buf(pbuf, pre, from - 1);
- if (r != 0) return r;
- }
- if (to == ~((OnigCodePoint )0)) break;
- pre = to + 1;
- }
- if (to < ~((OnigCodePoint )0)) {
- r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
- }
- return r;
-}
-
-#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
- BBuf *tbuf; \
- int tnot; \
- tnot = not1; not1 = not2; not2 = tnot; \
- tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
-} while (0)
-
-static int
-or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
- BBuf* bbuf2, int not2, BBuf** pbuf)
-{
- int r;
- OnigCodePoint i, n1, *data1;
- OnigCodePoint from, to;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
- if (not1 != 0 || not2 != 0)
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- return 0;
- }
-
- r = 0;
- if (IS_NULL(bbuf2))
- SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
-
- if (IS_NULL(bbuf1)) {
- if (not1 != 0) {
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- }
- else {
- if (not2 == 0) {
- return bbuf_clone(pbuf, bbuf2);
- }
- else {
- return not_code_range_buf(enc, bbuf2, pbuf);
- }
- }
- }
-
- if (not1 != 0)
- SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
-
- data1 = (OnigCodePoint* )(bbuf1->p);
- GET_CODE_POINT(n1, data1);
- data1++;
-
- if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
- r = bbuf_clone(pbuf, bbuf2);
- }
- else if (not1 == 0) { /* 1 OR (not 2) */
- r = not_code_range_buf(enc, bbuf2, pbuf);
- }
- if (r != 0) return r;
-
- for (i = 0; i < n1; i++) {
- from = data1[i*2];
- to = data1[i*2+1];
- r = add_code_range_to_buf(pbuf, from, to);
- if (r != 0) return r;
- }
- return 0;
-}
-
-static int
-and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
- OnigCodePoint* data, int n)
-{
- int i, r;
- OnigCodePoint from2, to2;
-
- for (i = 0; i < n; i++) {
- from2 = data[i*2];
- to2 = data[i*2+1];
- if (from2 < from1) {
- if (to2 < from1) continue;
- else {
- from1 = to2 + 1;
- }
- }
- else if (from2 <= to1) {
- if (to2 < to1) {
- if (from1 <= from2 - 1) {
- r = add_code_range_to_buf(pbuf, from1, from2-1);
- if (r != 0) return r;
- }
- from1 = to2 + 1;
- }
- else {
- to1 = from2 - 1;
- }
- }
- else {
- from1 = from2;
- }
- if (from1 > to1) break;
- }
- if (from1 <= to1) {
- r = add_code_range_to_buf(pbuf, from1, to1);
- if (r != 0) return r;
- }
- return 0;
-}
-
-static int
-and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
-{
- int r;
- OnigCodePoint i, j, n1, n2, *data1, *data2;
- OnigCodePoint from, to, from1, to1, from2, to2;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf1)) {
- if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
- return bbuf_clone(pbuf, bbuf2);
- return 0;
- }
- else if (IS_NULL(bbuf2)) {
- if (not2 != 0)
- return bbuf_clone(pbuf, bbuf1);
- return 0;
- }
-
- if (not1 != 0)
- SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
-
- data1 = (OnigCodePoint* )(bbuf1->p);
- data2 = (OnigCodePoint* )(bbuf2->p);
- GET_CODE_POINT(n1, data1);
- GET_CODE_POINT(n2, data2);
- data1++;
- data2++;
-
- if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
- for (i = 0; i < n1; i++) {
- from1 = data1[i*2];
- to1 = data1[i*2+1];
- for (j = 0; j < n2; j++) {
- from2 = data2[j*2];
- to2 = data2[j*2+1];
- if (from2 > to1) break;
- if (to2 < from1) continue;
- from = MAX(from1, from2);
- to = MIN(to1, to2);
- r = add_code_range_to_buf(pbuf, from, to);
- if (r != 0) return r;
- }
- }
- }
- else if (not1 == 0) { /* 1 AND (not 2) */
- for (i = 0; i < n1; i++) {
- from1 = data1[i*2];
- to1 = data1[i*2+1];
- r = and_code_range1(pbuf, from1, to1, data2, n2);
- if (r != 0) return r;
- }
- }
-
- return 0;
-}
-
-static int
-and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
-{
- int r, not1, not2;
- BBuf *buf1, *buf2, *pbuf;
- BitSetRef bsr1, bsr2;
- BitSet bs1, bs2;
-
- not1 = IS_NCCLASS_NOT(dest);
- bsr1 = dest->bs;
- buf1 = dest->mbuf;
- not2 = IS_NCCLASS_NOT(cc);
- bsr2 = cc->bs;
- buf2 = cc->mbuf;
-
- if (not1 != 0) {
- bitset_invert_to(bsr1, bs1);
- bsr1 = bs1;
- }
- if (not2 != 0) {
- bitset_invert_to(bsr2, bs2);
- bsr2 = bs2;
- }
- bitset_and(bsr1, bsr2);
- if (bsr1 != dest->bs) {
- bitset_copy(dest->bs, bsr1);
- }
- if (not1 != 0) {
- bitset_invert(dest->bs);
- }
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- if (not1 != 0 && not2 != 0) {
- r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
- }
- else {
- r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf;
- r = not_code_range_buf(enc, pbuf, &tbuf);
- if (r != 0) {
- bbuf_free(pbuf);
- return r;
- }
- bbuf_free(pbuf);
- pbuf = tbuf;
- }
- }
- if (r != 0) return r;
-
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- return r;
- }
- return 0;
-}
-
-static int
-or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
-{
- int r, not1, not2;
- BBuf *buf1, *buf2, *pbuf;
- BitSetRef bsr1, bsr2;
- BitSet bs1, bs2;
-
- not1 = IS_NCCLASS_NOT(dest);
- bsr1 = dest->bs;
- buf1 = dest->mbuf;
- not2 = IS_NCCLASS_NOT(cc);
- bsr2 = cc->bs;
- buf2 = cc->mbuf;
-
- if (not1 != 0) {
- bitset_invert_to(bsr1, bs1);
- bsr1 = bs1;
- }
- if (not2 != 0) {
- bitset_invert_to(bsr2, bs2);
- bsr2 = bs2;
- }
- bitset_or(bsr1, bsr2);
- if (bsr1 != dest->bs) {
- bitset_copy(dest->bs, bsr1);
- }
- if (not1 != 0) {
- bitset_invert(dest->bs);
- }
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- if (not1 != 0 && not2 != 0) {
- r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
- }
- else {
- r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf;
- r = not_code_range_buf(enc, pbuf, &tbuf);
- if (r != 0) {
- bbuf_free(pbuf);
- return r;
- }
- bbuf_free(pbuf);
- pbuf = tbuf;
- }
- }
- if (r != 0) return r;
-
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- return r;
- }
- else
- return 0;
-}
-
-static OnigCodePoint
-conv_backslash_value(OnigCodePoint c, ScanEnv* env)
-{
- if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
- switch (c) {
- case 'n': return '\n';
- case 't': return '\t';
- case 'r': return '\r';
- case 'f': return '\f';
- case 'a': return '\007';
- case 'b': return '\010';
- case 'e': return '\033';
- case 'v':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
- return '\v';
- break;
-
- default:
- break;
- }
- }
- return c;
-}
-
-static int
-is_invalid_quantifier_target(Node* node)
-{
- switch (NODE_TYPE(node)) {
- case NODE_ANCHOR:
- case NODE_GIMMICK:
- return 1;
- break;
-
- case NODE_BAG:
- /* allow enclosed elements */
- /* return is_invalid_quantifier_target(NODE_BODY(node)); */
- break;
-
- case NODE_LIST:
- do {
- if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- return 0;
- break;
-
- case NODE_ALT:
- do {
- if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
- } while (IS_NOT_NULL(node = NODE_CDR(node)));
- break;
-
- default:
- break;
- }
- return 0;
-}
-
-/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
-static int
-quantifier_type_num(QuantNode* q)
-{
- if (q->greedy) {
- if (q->lower == 0) {
- if (q->upper == 1) return 0;
- else if (IS_INFINITE_REPEAT(q->upper)) return 1;
- }
- else if (q->lower == 1) {
- if (IS_INFINITE_REPEAT(q->upper)) return 2;
- }
- }
- else {
- if (q->lower == 0) {
- if (q->upper == 1) return 3;
- else if (IS_INFINITE_REPEAT(q->upper)) return 4;
- }
- else if (q->lower == 1) {
- if (IS_INFINITE_REPEAT(q->upper)) return 5;
- }
- }
- return -1;
-}
-
-
-enum ReduceType {
- RQ_ASIS = 0, /* as is */
- RQ_DEL
This message has been truncated.

Join devel@edk2.groups.io to automatically receive all group messages.