tpy-lang 0.3.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tpy_lang-0.3.0.dev0.dist-info/METADATA +151 -0
- tpy_lang-0.3.0.dev0.dist-info/RECORD +333 -0
- tpy_lang-0.3.0.dev0.dist-info/WHEEL +4 -0
- tpy_lang-0.3.0.dev0.dist-info/entry_points.txt +3 -0
- tpyc/__init__.py +104 -0
- tpyc/__main__.py +6 -0
- tpyc/_buildinfo.py +1 -0
- tpyc/_data/docs/LANGUAGE_FEATURES.md +6278 -0
- tpyc/_data/docs/STDLIB_ROADMAP.md +1258 -0
- tpyc/_data/docs/TPY_FOR_AGENTS.md +556 -0
- tpyc/_data/lib/tpy/_bindings/__init__.py +6 -0
- tpyc/_data/lib/tpy/_bindings/pcre2.py +173 -0
- tpyc/_data/lib/tpy/_bindings/posix_socket.py +161 -0
- tpyc/_data/lib/tpy/_functools_macros.py +80 -0
- tpyc/_data/lib/tpy/_macro_helpers.py +161 -0
- tpyc/_data/lib/tpy/argparse.py +2062 -0
- tpyc/_data/lib/tpy/asyncio/__init__.py +744 -0
- tpyc/_data/lib/tpy/asyncio/_executor.py +515 -0
- tpyc/_data/lib/tpy/base64.py +410 -0
- tpyc/_data/lib/tpy/bisect.py +39 -0
- tpyc/_data/lib/tpy/builtins.py +38 -0
- tpyc/_data/lib/tpy/dataclasses.py +354 -0
- tpyc/_data/lib/tpy/enum.py +23 -0
- tpyc/_data/lib/tpy/functools.py +33 -0
- tpyc/_data/lib/tpy/hashlib.py +206 -0
- tpyc/_data/lib/tpy/heapq.py +118 -0
- tpyc/_data/lib/tpy/io.py +395 -0
- tpyc/_data/lib/tpy/json.py +221 -0
- tpyc/_data/lib/tpy/math.py +406 -0
- tpyc/_data/lib/tpy/random.py +597 -0
- tpyc/_data/lib/tpy/re.py +467 -0
- tpyc/_data/lib/tpy/socket.py +379 -0
- tpyc/_data/lib/tpy/struct.py +178 -0
- tpyc/_data/lib/tpy/sys.py +40 -0
- tpyc/_data/lib/tpy/time.py +39 -0
- tpyc/_data/lib/tpy/tpy/__init__.py +78 -0
- tpyc/_data/lib/tpy/tpy/_bootstrap/__init__.py +10 -0
- tpyc/_data/lib/tpy/tpy/_bootstrap/_decorators.py +37 -0
- tpyc/_data/lib/tpy/tpy/_bootstrap/_extern.py +64 -0
- tpyc/_data/lib/tpy/tpy/_builtins/__init__.py +11 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_bytes.py +378 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_dict.py +151 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_exceptions.py +125 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_funcs.py +681 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_io.py +97 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_list.py +127 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_range.py +52 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_set.py +139 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_super.py +11 -0
- tpyc/_data/lib/tpy/tpy/_builtins/_types.py +661 -0
- tpyc/_data/lib/tpy/tpy/_core/__init__.py +23 -0
- tpyc/_data/lib/tpy/tpy/_core/_bytes_view.py +129 -0
- tpyc/_data/lib/tpy/tpy/_core/_containers.py +137 -0
- tpyc/_data/lib/tpy/tpy/_core/_functions.py +40 -0
- tpyc/_data/lib/tpy/tpy/_core/_types.py +2061 -0
- tpyc/_data/lib/tpy/tpy/_typing/__init__.py +77 -0
- tpyc/_data/lib/tpy/tpy/_version.py +29 -0
- tpyc/_data/lib/tpy/tpy/bits.py +28 -0
- tpyc/_data/lib/tpy/tpy/coro/__init__.py +127 -0
- tpyc/_data/lib/tpy/tpy/extern.py +8 -0
- tpyc/_data/lib/tpy/tpy/mem.py +49 -0
- tpyc/_data/lib/tpy/tpy/unsafe.py +195 -0
- tpyc/_data/lib/tpy/tpy/version.py +21 -0
- tpyc/_data/lib/tpy/typing.py +13 -0
- tpyc/_data/runtime/cpp/include/tpy/any.hpp +461 -0
- tpyc/_data/runtime/cpp/include/tpy/as_ostream.hpp +117 -0
- tpyc/_data/runtime/cpp/include/tpy/async.hpp +76 -0
- tpyc/_data/runtime/cpp/include/tpy/bigint.hpp +1343 -0
- tpyc/_data/runtime/cpp/include/tpy/builtins.hpp +400 -0
- tpyc/_data/runtime/cpp/include/tpy/bytes_ops.hpp +469 -0
- tpyc/_data/runtime/cpp/include/tpy/container_ops.hpp +487 -0
- tpyc/_data/runtime/cpp/include/tpy/copy_iter.hpp +82 -0
- tpyc/_data/runtime/cpp/include/tpy/core.hpp +558 -0
- tpyc/_data/runtime/cpp/include/tpy/dict_ops.hpp +289 -0
- tpyc/_data/runtime/cpp/include/tpy/dunder.hpp +750 -0
- tpyc/_data/runtime/cpp/include/tpy/dynamic.hpp +44 -0
- tpyc/_data/runtime/cpp/include/tpy/enum.hpp +40 -0
- tpyc/_data/runtime/cpp/include/tpy/file.hpp +245 -0
- tpyc/_data/runtime/cpp/include/tpy/fixed_int.hpp +317 -0
- tpyc/_data/runtime/cpp/include/tpy/format.hpp +954 -0
- tpyc/_data/runtime/cpp/include/tpy/frame_slot.hpp +120 -0
- tpyc/_data/runtime/cpp/include/tpy/generator.hpp +47 -0
- tpyc/_data/runtime/cpp/include/tpy/iterable_ops.hpp +122 -0
- tpyc/_data/runtime/cpp/include/tpy/itertools.hpp +749 -0
- tpyc/_data/runtime/cpp/include/tpy/next_iter.hpp +82 -0
- tpyc/_data/runtime/cpp/include/tpy/ordered_map.hpp +518 -0
- tpyc/_data/runtime/cpp/include/tpy/ordered_set.hpp +337 -0
- tpyc/_data/runtime/cpp/include/tpy/own_iter.hpp +54 -0
- tpyc/_data/runtime/cpp/include/tpy/pascal_graph_sdl.hpp +192 -0
- tpyc/_data/runtime/cpp/include/tpy/printing.hpp +302 -0
- tpyc/_data/runtime/cpp/include/tpy/protocols.hpp +61 -0
- tpyc/_data/runtime/cpp/include/tpy/range.hpp +115 -0
- tpyc/_data/runtime/cpp/include/tpy/ranges.hpp +212 -0
- tpyc/_data/runtime/cpp/include/tpy/set_ops.hpp +265 -0
- tpyc/_data/runtime/cpp/include/tpy/slice.hpp +47 -0
- tpyc/_data/runtime/cpp/include/tpy/span_iter.hpp +42 -0
- tpyc/_data/runtime/cpp/include/tpy/stdlib/math.hpp +41 -0
- tpyc/_data/runtime/cpp/include/tpy/stdlib/pcre2_h.hpp +96 -0
- tpyc/_data/runtime/cpp/include/tpy/stdlib/random.hpp +25 -0
- tpyc/_data/runtime/cpp/include/tpy/stdlib/socket_h.hpp +145 -0
- tpyc/_data/runtime/cpp/include/tpy/stdlib/time.hpp +62 -0
- tpyc/_data/runtime/cpp/include/tpy/system.hpp +121 -0
- tpyc/_data/runtime/cpp/include/tpy/throwable.hpp +55 -0
- tpyc/_data/runtime/cpp/include/tpy/tpy.hpp +156 -0
- tpyc/_data/runtime/cpp/include/tpy/type_name.hpp +77 -0
- tpyc/_data/runtime/cpp/include/tpy/type_traits.hpp +240 -0
- tpyc/_data/runtime/cpp/include/tpy/uninit_array_storage.hpp +250 -0
- tpyc/_data/runtime/cpp/include/tpy/uninit_heap_storage.hpp +277 -0
- tpyc/_data/runtime/cpp/include/tpy/varargs.hpp +174 -0
- tpyc/_data/runtime/cpp/include/tpy/variant_ref.hpp +118 -0
- tpyc/_data/runtime/cpp/src/stdlib/socket_impl.cpp +104 -0
- tpyc/_data/runtime/cpp/third_party/README.md +58 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/AUTHORS +36 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/CMakeLists.txt +1233 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/COPYING +5 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/ChangeLog +3097 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/HACKING +853 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/INSTALL +368 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/LICENCE +94 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/NEWS +492 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/NON-AUTOTOOLS-BUILD +430 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/README +956 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/COPYING-CMAKE-SCRIPTS +22 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindEditline.cmake +16 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindPackageHandleStandardArgs.cmake +58 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindReadline.cmake +29 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config-version.cmake.in +15 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config.cmake.in +148 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/config-cmake.h.in +56 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-16.pc.in +13 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-32.pc.in +13 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-8.pc.in +13 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-posix.pc.in +13 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/pcre2-config.in +121 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h +483 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.generic +483 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.in +460 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h +1010 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.generic +1010 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.in +1010 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_auto_possess.c +1371 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c +196 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c.dist +196 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chkdint.c +96 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_compile.c +11001 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_config.c +252 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_context.c +510 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_convert.c +1189 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dfa_match.c +4119 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dftables.c +297 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_error.c +345 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_extuni.c +162 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_find_bracket.c +219 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_fuzzsupport.c +792 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_internal.h +2084 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_intmodedep.h +940 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_compile.c +14972 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_match.c +200 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_misc.c +234 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_neon_inc.h +354 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_simd_inc.h +2355 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_test.c +2528 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_maketables.c +165 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match.c +7777 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match_data.c +185 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_newline.c +243 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ord2utf.c +120 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_pattern_info.c +432 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_printint.c +886 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_script_run.c +344 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_serialize.c +286 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_string_utils.c +237 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_study.c +1915 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substitute.c +1009 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substring.c +550 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_tables.c +234 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucd.c +5460 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucp.h +396 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucptables.c +1533 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_valid_utf.c +398 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_xclass.c +308 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2demo.c +497 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2grep.c +4606 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.c +425 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.h +187 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix_test.c +209 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2test.c +9708 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c +137 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c +327 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c +89 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c +62 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c +40 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c +72 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c +172 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c +141 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c +102 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfig.h +142 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigCPU.h +188 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigInternal.h +907 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.c +3561 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.h +2466 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_32.c +4636 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_64.c +3491 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_T2_32.c +4302 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeLOONGARCH_64.c +3765 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_32.c +472 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_64.c +387 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_common.c +4259 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_32.c +485 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_64.c +719 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_common.c +3161 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_32.c +142 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_64.c +222 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_common.c +3121 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeS390X.c +4526 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_32.c +1685 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_64.c +1398 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_common.c +5001 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitSerialize.c +516 -0
- tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitUtils.c +344 -0
- tpyc/_data/runtime/cpp/third_party/pcre2.sources.txt +54 -0
- tpyc/_data/runtime/cpp/third_party/pcre2.vendor.json +7 -0
- tpyc/build/__init__.py +7 -0
- tpyc/build/pcre2.py +122 -0
- tpyc/build/third_party.py +413 -0
- tpyc/cli.py +822 -0
- tpyc/codegen_cpp/__init__.py +18 -0
- tpyc/codegen_cpp/builtins.py +484 -0
- tpyc/codegen_cpp/context.py +2064 -0
- tpyc/codegen_cpp/expressions.py +5940 -0
- tpyc/codegen_cpp/functions.py +1913 -0
- tpyc/codegen_cpp/gen_async.py +3258 -0
- tpyc/codegen_cpp/gen_generators.py +657 -0
- tpyc/codegen_cpp/generator.py +2258 -0
- tpyc/codegen_cpp/match.py +1997 -0
- tpyc/codegen_cpp/param_const.py +172 -0
- tpyc/codegen_cpp/protocols.py +907 -0
- tpyc/codegen_cpp/records.py +1654 -0
- tpyc/codegen_cpp/resumable_cfg.py +1651 -0
- tpyc/codegen_cpp/statements.py +4963 -0
- tpyc/codegen_cpp/string_dispatch.py +76 -0
- tpyc/codegen_cpp/test_context.py +46 -0
- tpyc/codegen_cpp/test_param_const.py +113 -0
- tpyc/codegen_cpp/test_resumable_cfg.py +182 -0
- tpyc/codegen_cpp/type_resolution.py +53 -0
- tpyc/codegen_cpp/types.py +436 -0
- tpyc/codegen_cpp/variant_access.py +135 -0
- tpyc/coercions.py +749 -0
- tpyc/compilation_context.py +57 -0
- tpyc/compiler.py +3945 -0
- tpyc/cycle_detection.py +358 -0
- tpyc/diagnostics.py +135 -0
- tpyc/dump_types.py +353 -0
- tpyc/frontend_diagnostics.py +47 -0
- tpyc/frontend_ir/__init__.py +140 -0
- tpyc/frontend_ir/lower.py +1098 -0
- tpyc/frontend_ir/nodes.py +718 -0
- tpyc/frontend_ir/resolver_adapter.py +151 -0
- tpyc/frontend_plugin.py +209 -0
- tpyc/install_docs.py +81 -0
- tpyc/liveness.py +756 -0
- tpyc/macro_api.py +1724 -0
- tpyc/macro_loader.py +497 -0
- tpyc/module_names.py +64 -0
- tpyc/modules/__init__.py +31 -0
- tpyc/modules/defs.py +89 -0
- tpyc/modules/registry.py +36 -0
- tpyc/modules/resolver.py +192 -0
- tpyc/modules/type_resolution.py +629 -0
- tpyc/namespace.py +172 -0
- tpyc/parse/__init__.py +84 -0
- tpyc/parse/imports.py +490 -0
- tpyc/parse/nodes.py +1732 -0
- tpyc/parse/parser.py +4043 -0
- tpyc/parse/resolve_refs.py +466 -0
- tpyc/parse/type_resolver.py +1060 -0
- tpyc/prescan.py +254 -0
- tpyc/qnames.py +149 -0
- tpyc/repl.py +529 -0
- tpyc/repl_backends.py +848 -0
- tpyc/sema/__init__.py +21 -0
- tpyc/sema/analyzer.py +3625 -0
- tpyc/sema/bound_check.py +72 -0
- tpyc/sema/builder_trace.py +684 -0
- tpyc/sema/calls.py +5406 -0
- tpyc/sema/compatibility.py +2107 -0
- tpyc/sema/context.py +1243 -0
- tpyc/sema/expressions.py +3737 -0
- tpyc/sema/flow_facts.py +199 -0
- tpyc/sema/init_tracker.py +150 -0
- tpyc/sema/list_literals.py +69 -0
- tpyc/sema/literal_utils.py +27 -0
- tpyc/sema/local_deduction.py +1088 -0
- tpyc/sema/macros.py +179 -0
- tpyc/sema/match.py +1177 -0
- tpyc/sema/method_expansion.py +347 -0
- tpyc/sema/methods.py +2197 -0
- tpyc/sema/mutation_propagation.py +268 -0
- tpyc/sema/narrowing.py +857 -0
- tpyc/sema/numeric_lattice.py +160 -0
- tpyc/sema/operators.py +402 -0
- tpyc/sema/overloads.py +841 -0
- tpyc/sema/protocols.py +1209 -0
- tpyc/sema/reach_analysis.py +202 -0
- tpyc/sema/registration.py +3156 -0
- tpyc/sema/scope_tracker.py +193 -0
- tpyc/sema/statements.py +4426 -0
- tpyc/sema/type_ops.py +1879 -0
- tpyc/sema/value_range.py +181 -0
- tpyc/symbol_binding.py +259 -0
- tpyc/test_c3_mro.py +208 -0
- tpyc/test_cli_argv.py +52 -0
- tpyc/test_compiler.py +559 -0
- tpyc/test_contains_type_param.py +101 -0
- tpyc/test_cycle_detection.py +221 -0
- tpyc/test_dump_types.py +225 -0
- tpyc/test_install_docs.py +65 -0
- tpyc/test_local_cpp_form.py +135 -0
- tpyc/test_macro_loader.py +76 -0
- tpyc/test_method_expansion.py +254 -0
- tpyc/test_nominal_identity.py +182 -0
- tpyc/test_overloads.py +410 -0
- tpyc/test_parse.py +303 -0
- tpyc/test_parse_type_ref.py +506 -0
- tpyc/test_parse_version_info.py +58 -0
- tpyc/test_reach_analysis.py +72 -0
- tpyc/test_ref_type.py +216 -0
- tpyc/test_send_sync_substitution.py +276 -0
- tpyc/test_tuple_mutation_propagation.py +206 -0
- tpyc/test_type_def_registry.py +1729 -0
- tpyc/test_union_types.py +195 -0
- tpyc/type_def_registry.py +975 -0
- tpyc/typesys.py +5104 -0
|
@@ -0,0 +1,2528 @@
|
|
|
1
|
+
/*************************************************
|
|
2
|
+
* Perl-Compatible Regular Expressions *
|
|
3
|
+
*************************************************/
|
|
4
|
+
|
|
5
|
+
/* PCRE is a library of functions to support regular expressions whose syntax
|
|
6
|
+
and semantics are as close as possible to those of the Perl 5 language.
|
|
7
|
+
|
|
8
|
+
Written by Philip Hazel
|
|
9
|
+
Original API code Copyright (c) 1997-2012 University of Cambridge
|
|
10
|
+
New API code Copyright (c) 2016 University of Cambridge
|
|
11
|
+
|
|
12
|
+
-----------------------------------------------------------------------------
|
|
13
|
+
Redistribution and use in source and binary forms, with or without
|
|
14
|
+
modification, are permitted provided that the following conditions are met:
|
|
15
|
+
|
|
16
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer.
|
|
18
|
+
|
|
19
|
+
* Redistributions in binary form must reproduce the above copyright
|
|
20
|
+
notice, this list of conditions and the following disclaimer in the
|
|
21
|
+
documentation and/or other materials provided with the distribution.
|
|
22
|
+
|
|
23
|
+
* Neither the name of the University of Cambridge nor the names of its
|
|
24
|
+
contributors may be used to endorse or promote products derived from
|
|
25
|
+
this software without specific prior written permission.
|
|
26
|
+
|
|
27
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
28
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
29
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
30
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
31
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
32
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
33
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
34
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
35
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
36
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
37
|
+
POSSIBILITY OF SUCH DAMAGE.
|
|
38
|
+
-----------------------------------------------------------------------------
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
#ifdef HAVE_CONFIG_H
|
|
42
|
+
#include "config.h"
|
|
43
|
+
#endif
|
|
44
|
+
|
|
45
|
+
#include <stdio.h>
|
|
46
|
+
#include <string.h>
|
|
47
|
+
|
|
48
|
+
#define PCRE2_CODE_UNIT_WIDTH 0
|
|
49
|
+
#include "pcre2.h"
|
|
50
|
+
|
|
51
|
+
/*
|
|
52
|
+
Letter characters:
|
|
53
|
+
\xe6\x92\xad = 0x64ad = 25773 (kanji)
|
|
54
|
+
Non-letter characters:
|
|
55
|
+
\xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
|
|
56
|
+
\xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
|
|
57
|
+
\xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
|
|
58
|
+
\xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
|
|
59
|
+
Newlines:
|
|
60
|
+
\xc2\x85 = 0x85 = 133 (NExt Line = NEL)
|
|
61
|
+
\xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
|
|
62
|
+
Othercase pairs:
|
|
63
|
+
\xc3\xa9 = 0xe9 = 233 (e')
|
|
64
|
+
\xc3\x89 = 0xc9 = 201 (E')
|
|
65
|
+
\xc3\xa1 = 0xe1 = 225 (a')
|
|
66
|
+
\xc3\x81 = 0xc1 = 193 (A')
|
|
67
|
+
\x53 = 0x53 = S
|
|
68
|
+
\x73 = 0x73 = s
|
|
69
|
+
\xc5\xbf = 0x17f = 383 (long S)
|
|
70
|
+
\xc8\xba = 0x23a = 570
|
|
71
|
+
\xe2\xb1\xa5 = 0x2c65 = 11365
|
|
72
|
+
\xe1\xbd\xb8 = 0x1f78 = 8056
|
|
73
|
+
\xe1\xbf\xb8 = 0x1ff8 = 8184
|
|
74
|
+
\xf0\x90\x90\x80 = 0x10400 = 66560
|
|
75
|
+
\xf0\x90\x90\xa8 = 0x10428 = 66600
|
|
76
|
+
\xc7\x84 = 0x1c4 = 452
|
|
77
|
+
\xc7\x85 = 0x1c5 = 453
|
|
78
|
+
\xc7\x86 = 0x1c6 = 454
|
|
79
|
+
Caseless sets:
|
|
80
|
+
ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
|
|
81
|
+
ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
|
|
82
|
+
ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
|
|
83
|
+
|
|
84
|
+
Mark property:
|
|
85
|
+
\xcc\x8d = 0x30d = 781
|
|
86
|
+
Special:
|
|
87
|
+
\xc2\x80 = 0x80 = 128 (lowest 2 byte character)
|
|
88
|
+
\xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
|
|
89
|
+
\xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
|
|
90
|
+
\xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
|
|
91
|
+
\xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
|
|
92
|
+
\xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
|
|
93
|
+
*/
|
|
94
|
+
|
|
95
|
+
static int regression_tests(void);
|
|
96
|
+
static int invalid_utf8_regression_tests(void);
|
|
97
|
+
static int invalid_utf16_regression_tests(void);
|
|
98
|
+
static int invalid_utf32_regression_tests(void);
|
|
99
|
+
|
|
100
|
+
int main(void)
|
|
101
|
+
{
|
|
102
|
+
int jit = 0;
|
|
103
|
+
#if defined SUPPORT_PCRE2_8
|
|
104
|
+
pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
|
|
105
|
+
#elif defined SUPPORT_PCRE2_16
|
|
106
|
+
pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
|
|
107
|
+
#elif defined SUPPORT_PCRE2_32
|
|
108
|
+
pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
|
|
109
|
+
#endif
|
|
110
|
+
if (!jit) {
|
|
111
|
+
printf("JIT must be enabled to run pcre2_jit_test\n");
|
|
112
|
+
return 1;
|
|
113
|
+
}
|
|
114
|
+
return regression_tests()
|
|
115
|
+
| invalid_utf8_regression_tests()
|
|
116
|
+
| invalid_utf16_regression_tests()
|
|
117
|
+
| invalid_utf32_regression_tests();
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/* --------------------------------------------------------------------------------------- */
|
|
121
|
+
|
|
122
|
+
#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
|
|
123
|
+
#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
|
|
124
|
+
#endif
|
|
125
|
+
|
|
126
|
+
#define MU (PCRE2_MULTILINE | PCRE2_UTF)
|
|
127
|
+
#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
|
128
|
+
#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
|
|
129
|
+
#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
|
|
130
|
+
#define M (PCRE2_MULTILINE)
|
|
131
|
+
#define MP (PCRE2_MULTILINE | PCRE2_UCP)
|
|
132
|
+
#define U (PCRE2_UTF)
|
|
133
|
+
#define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
|
|
134
|
+
|
|
135
|
+
#define BSR(x) ((x) << 16)
|
|
136
|
+
#define A PCRE2_NEWLINE_ANYCRLF
|
|
137
|
+
|
|
138
|
+
#define GET_NEWLINE(x) ((x) & 0xffff)
|
|
139
|
+
#define GET_BSR(x) ((x) >> 16)
|
|
140
|
+
|
|
141
|
+
#define OFFSET_MASK 0x00ffff
|
|
142
|
+
#define F_NO8 0x010000
|
|
143
|
+
#define F_NO16 0x020000
|
|
144
|
+
#define F_NO32 0x020000
|
|
145
|
+
#define F_NOMATCH 0x040000
|
|
146
|
+
#define F_DIFF 0x080000
|
|
147
|
+
#define F_FORCECONV 0x100000
|
|
148
|
+
#define F_PROPERTY 0x200000
|
|
149
|
+
|
|
150
|
+
struct regression_test_case {
|
|
151
|
+
uint32_t compile_options;
|
|
152
|
+
int newline;
|
|
153
|
+
int match_options;
|
|
154
|
+
int start_offset;
|
|
155
|
+
const char *pattern;
|
|
156
|
+
const char *input;
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
static struct regression_test_case regression_test_cases[] = {
|
|
160
|
+
/* Constant strings. */
|
|
161
|
+
{ MU, A, 0, 0, "AbC", "AbAbC" },
|
|
162
|
+
{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
|
|
163
|
+
{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
|
|
164
|
+
{ M, A, 0, 0, "[^a]", "aAbB" },
|
|
165
|
+
{ CM, A, 0, 0, "[^m]", "mMnN" },
|
|
166
|
+
{ M, A, 0, 0, "a[^b][^#]", "abacd" },
|
|
167
|
+
{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
|
|
168
|
+
{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
|
|
169
|
+
{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
|
|
170
|
+
{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
|
|
171
|
+
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
|
|
172
|
+
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
|
|
173
|
+
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
|
|
174
|
+
{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
|
|
175
|
+
{ MU, A, 0, 0, "[axd]", "sAXd" },
|
|
176
|
+
{ CMU, A, 0, 0, "[axd]", "sAXd" },
|
|
177
|
+
{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
|
|
178
|
+
{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
|
|
179
|
+
{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
|
|
180
|
+
{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
|
181
|
+
{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
|
|
182
|
+
{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
|
|
183
|
+
{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
|
|
184
|
+
{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
|
|
185
|
+
{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
|
|
186
|
+
{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
|
|
187
|
+
{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
|
|
188
|
+
#ifndef NEVER_BACKSLASH_C
|
|
189
|
+
{ M, A, 0, 0, "\\Ca", "cda" },
|
|
190
|
+
{ CM, A, 0, 0, "\\Ca", "CDA" },
|
|
191
|
+
{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
|
|
192
|
+
{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
|
|
193
|
+
#endif /* !NEVER_BACKSLASH_C */
|
|
194
|
+
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
|
195
|
+
{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
|
196
|
+
{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
|
|
197
|
+
{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
|
198
|
+
{ M, A, 0, 0, "[3-57-9]", "5" },
|
|
199
|
+
{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
|
|
200
|
+
"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
|
|
201
|
+
{ 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
|
202
|
+
{ 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
|
|
203
|
+
|
|
204
|
+
/* Assertions. */
|
|
205
|
+
{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
|
|
206
|
+
{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
|
|
207
|
+
{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
|
|
208
|
+
{ MP, A, 0, 0, "\\B", "_\xa1" },
|
|
209
|
+
{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
|
|
210
|
+
{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
|
|
211
|
+
{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
|
|
212
|
+
{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
|
|
213
|
+
{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
|
214
|
+
{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
|
|
215
|
+
{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
|
|
216
|
+
{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
|
|
217
|
+
{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
|
|
218
|
+
{ 0, 0, 0, 0, "^ab", "ab" },
|
|
219
|
+
{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
|
|
220
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
|
|
221
|
+
{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
|
|
222
|
+
{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
|
|
223
|
+
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
|
|
224
|
+
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
|
|
225
|
+
{ 0, 0, 0, 0, "ab$", "ab" },
|
|
226
|
+
{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
|
|
227
|
+
{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
|
|
228
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
|
|
229
|
+
{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
|
|
230
|
+
{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
|
|
231
|
+
{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
|
|
232
|
+
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
|
|
233
|
+
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
|
|
234
|
+
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
|
|
235
|
+
{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
|
|
236
|
+
{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
|
|
237
|
+
{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
|
|
238
|
+
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
|
|
239
|
+
{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
|
|
240
|
+
{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
|
|
241
|
+
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
|
|
242
|
+
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
|
|
243
|
+
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
|
244
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
|
245
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
|
246
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
|
247
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
|
248
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
|
249
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
|
|
250
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
|
|
251
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
|
|
252
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
|
|
253
|
+
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
|
|
254
|
+
{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
|
|
255
|
+
{ M, A, 0, 0, "\\Aa", "aaa" },
|
|
256
|
+
{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
|
|
257
|
+
{ M, A, 0, 1, "\\Ga", "aaa" },
|
|
258
|
+
{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
|
|
259
|
+
{ M, A, 0, 0, "a\\z", "aaa" },
|
|
260
|
+
{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
|
|
261
|
+
|
|
262
|
+
/* Brackets and alternatives. */
|
|
263
|
+
{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
|
|
264
|
+
{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
|
|
265
|
+
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
|
|
266
|
+
{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
|
|
267
|
+
{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
|
|
268
|
+
{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
|
|
269
|
+
{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
|
270
|
+
{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
|
|
271
|
+
{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
|
|
272
|
+
{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
|
|
273
|
+
{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
|
|
274
|
+
{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
|
|
275
|
+
{ CM, A, 0, 0, "ab|cd", "CD" },
|
|
276
|
+
{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
|
|
277
|
+
{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
|
|
278
|
+
{ 0, A, 0, 0, "(a|)b*+a", "a" },
|
|
279
|
+
{ 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
|
|
280
|
+
|
|
281
|
+
/* Greedy and non-greedy ? operators. */
|
|
282
|
+
{ MU, A, 0, 0, "(?:a)?a", "laab" },
|
|
283
|
+
{ CMU, A, 0, 0, "(A)?A", "llaab" },
|
|
284
|
+
{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
|
|
285
|
+
{ MU, A, 0, 0, "(a)?a", "manm" },
|
|
286
|
+
{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
|
|
287
|
+
{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
|
|
288
|
+
{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
|
|
289
|
+
|
|
290
|
+
/* Greedy and non-greedy + operators */
|
|
291
|
+
{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
|
|
292
|
+
{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
|
|
293
|
+
{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
|
|
294
|
+
{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
|
|
295
|
+
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
|
296
|
+
{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
|
|
297
|
+
{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
|
|
298
|
+
{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
|
|
299
|
+
|
|
300
|
+
/* Greedy and non-greedy * operators */
|
|
301
|
+
{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
|
|
302
|
+
{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
|
|
303
|
+
{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
|
|
304
|
+
{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
|
|
305
|
+
{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
|
|
306
|
+
{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
|
|
307
|
+
{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
|
|
308
|
+
{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
|
|
309
|
+
|
|
310
|
+
/* Combining ? + * operators */
|
|
311
|
+
{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
|
|
312
|
+
{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
|
313
|
+
{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
|
|
314
|
+
{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
|
|
315
|
+
{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
|
|
316
|
+
|
|
317
|
+
/* Single character iterators. */
|
|
318
|
+
{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
|
|
319
|
+
{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
|
|
320
|
+
{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
|
|
321
|
+
{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
|
|
322
|
+
{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
|
|
323
|
+
{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
|
|
324
|
+
{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
|
|
325
|
+
{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
|
|
326
|
+
{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
|
|
327
|
+
{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
|
|
328
|
+
{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
|
|
329
|
+
{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
|
|
330
|
+
{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
|
|
331
|
+
{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
|
|
332
|
+
{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
|
|
333
|
+
{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
|
|
334
|
+
{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
|
|
335
|
+
{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
|
|
336
|
+
{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
|
|
337
|
+
{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
|
|
338
|
+
{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
|
|
339
|
+
{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
|
|
340
|
+
{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
|
|
341
|
+
{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
|
|
342
|
+
{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
|
|
343
|
+
{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
|
|
344
|
+
{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
|
|
345
|
+
{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
|
|
346
|
+
{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
|
|
347
|
+
{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
|
348
|
+
{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
|
|
349
|
+
{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
|
|
350
|
+
{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
|
|
351
|
+
{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
|
|
352
|
+
{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
|
|
353
|
+
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
|
|
354
|
+
{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
|
|
355
|
+
{ MU, A, 0, 0, ".[ab]*.", "xx" },
|
|
356
|
+
{ MU, A, 0, 0, ".[ab]*a", "xxa" },
|
|
357
|
+
{ MU, A, 0, 0, ".[ab]?.", "xx" },
|
|
358
|
+
{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
|
|
359
|
+
{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
|
|
360
|
+
{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
|
|
361
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
|
|
362
|
+
{ 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
|
|
363
|
+
|
|
364
|
+
/* Bracket repeats with limit. */
|
|
365
|
+
{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
|
|
366
|
+
{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
|
|
367
|
+
{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
|
|
368
|
+
{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
|
|
369
|
+
{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
|
|
370
|
+
{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
|
|
371
|
+
{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
|
|
372
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
|
|
373
|
+
{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
|
|
374
|
+
|
|
375
|
+
/* Basic character sets. */
|
|
376
|
+
{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
|
|
377
|
+
{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
|
|
378
|
+
{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
|
|
379
|
+
{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
|
|
380
|
+
{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
|
|
381
|
+
{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
|
|
382
|
+
{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
|
|
383
|
+
{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
|
|
384
|
+
{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
|
|
385
|
+
{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
|
|
386
|
+
{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
|
|
387
|
+
{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
|
|
388
|
+
{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
|
|
389
|
+
{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
|
|
390
|
+
{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
|
|
391
|
+
{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
|
|
392
|
+
{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
|
|
393
|
+
{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
|
|
394
|
+
{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
|
|
395
|
+
{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
|
|
396
|
+
{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
|
|
397
|
+
{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
|
|
398
|
+
{ CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
|
|
399
|
+
{ M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
|
|
400
|
+
|
|
401
|
+
/* Unicode properties. */
|
|
402
|
+
{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
|
|
403
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
|
|
404
|
+
{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
|
|
405
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
|
|
406
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
|
|
407
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
|
408
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
|
|
409
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
|
410
|
+
{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
|
411
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
|
|
412
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
|
|
413
|
+
{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
|
|
414
|
+
{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
|
|
415
|
+
{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
|
|
416
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
|
|
417
|
+
{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
|
|
418
|
+
{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
|
|
419
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
|
|
420
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
|
|
421
|
+
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
|
|
422
|
+
{ MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
|
|
423
|
+
{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
|
424
|
+
{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
|
|
425
|
+
{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
|
|
426
|
+
{ MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
|
|
427
|
+
{ MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
|
|
428
|
+
{ MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
|
|
429
|
+
|
|
430
|
+
/* Possible empty brackets. */
|
|
431
|
+
{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
|
|
432
|
+
{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
|
|
433
|
+
{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
|
|
434
|
+
{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
|
|
435
|
+
{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
|
|
436
|
+
{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
|
|
437
|
+
{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
|
|
438
|
+
{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
|
|
439
|
+
{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
|
|
440
|
+
{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
|
|
441
|
+
|
|
442
|
+
/* Start offset. */
|
|
443
|
+
{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
|
|
444
|
+
{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
|
445
|
+
{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
|
|
446
|
+
{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
|
|
447
|
+
|
|
448
|
+
/* Newline. */
|
|
449
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
|
450
|
+
{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
|
|
451
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
|
|
452
|
+
{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
|
|
453
|
+
{ MU, A, 0, 1, "^", "\r\n" },
|
|
454
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
|
|
455
|
+
{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
|
|
456
|
+
|
|
457
|
+
/* Any character except newline or any newline. */
|
|
458
|
+
{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
|
459
|
+
{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
|
|
460
|
+
{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
|
461
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
|
|
462
|
+
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
|
|
463
|
+
{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
|
|
464
|
+
{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
|
|
465
|
+
{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
|
|
466
|
+
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
|
|
467
|
+
{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
|
|
468
|
+
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
|
|
469
|
+
{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
|
|
470
|
+
{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
|
|
471
|
+
{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
|
|
472
|
+
{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
|
|
473
|
+
{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
|
|
474
|
+
{ MU, A, 0, 0, "\\R*", "\r\n\r" },
|
|
475
|
+
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
|
|
476
|
+
{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
|
|
477
|
+
{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
|
|
478
|
+
{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
|
|
479
|
+
{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
|
|
480
|
+
{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
|
|
481
|
+
{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
|
|
482
|
+
{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
|
|
483
|
+
{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
|
|
484
|
+
|
|
485
|
+
/* Atomic groups (no fallback from "next" direction). */
|
|
486
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
|
|
487
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
|
|
488
|
+
{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
|
|
489
|
+
"bababcdedefgheijijklmlmnop" },
|
|
490
|
+
{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
|
|
491
|
+
{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
|
|
492
|
+
{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
|
|
493
|
+
{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
|
|
494
|
+
{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
|
|
495
|
+
{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
|
|
496
|
+
{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
|
|
497
|
+
{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
|
|
498
|
+
{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
|
|
499
|
+
{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
|
|
500
|
+
{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
|
|
501
|
+
{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
|
|
502
|
+
{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
|
|
503
|
+
{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
|
|
504
|
+
{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
|
|
505
|
+
{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
|
|
506
|
+
{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
|
|
507
|
+
{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
|
|
508
|
+
{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
|
|
509
|
+
{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
|
|
510
|
+
{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
|
|
511
|
+
{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
|
|
512
|
+
{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
|
|
513
|
+
{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
|
514
|
+
{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
|
|
515
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
|
|
516
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
|
517
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
|
|
518
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
|
|
519
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
|
|
520
|
+
{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
|
|
521
|
+
{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
|
|
522
|
+
{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
|
|
523
|
+
{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
|
|
524
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
|
|
525
|
+
|
|
526
|
+
/* Possessive quantifiers. */
|
|
527
|
+
{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
|
|
528
|
+
{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
|
|
529
|
+
{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
|
|
530
|
+
{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
|
|
531
|
+
{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
|
|
532
|
+
{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
|
|
533
|
+
{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
|
|
534
|
+
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
|
|
535
|
+
{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
|
|
536
|
+
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
|
|
537
|
+
{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
|
|
538
|
+
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
|
|
539
|
+
{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
|
|
540
|
+
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
|
|
541
|
+
{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
|
|
542
|
+
{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
|
|
543
|
+
{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
|
|
544
|
+
{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
|
|
545
|
+
{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
|
|
546
|
+
{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
|
|
547
|
+
{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
|
|
548
|
+
{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
|
|
549
|
+
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
|
|
550
|
+
{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
|
|
551
|
+
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
|
|
552
|
+
{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
|
|
553
|
+
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
|
|
554
|
+
{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
|
|
555
|
+
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
|
|
556
|
+
{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
|
|
557
|
+
{ MU, A, 0, 0, "(A)*+$", "ABC" },
|
|
558
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
|
|
559
|
+
{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
|
|
560
|
+
{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
|
|
561
|
+
{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
|
562
|
+
{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
|
|
563
|
+
|
|
564
|
+
/* Back references. */
|
|
565
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
|
|
566
|
+
{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
|
|
567
|
+
{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
|
|
568
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
|
|
569
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
|
|
570
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
|
571
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
|
|
572
|
+
{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
|
|
573
|
+
{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
|
|
574
|
+
{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
|
|
575
|
+
{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
|
|
576
|
+
{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
|
|
577
|
+
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
|
578
|
+
{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
|
|
579
|
+
{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
|
|
580
|
+
{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
|
|
581
|
+
{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
|
|
582
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
|
583
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
|
|
584
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
|
|
585
|
+
{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
|
|
586
|
+
{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
|
|
587
|
+
{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
|
|
588
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
|
589
|
+
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
|
|
590
|
+
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
|
|
591
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
|
|
592
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
|
|
593
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
|
|
594
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
|
595
|
+
{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
|
|
596
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
|
|
597
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
|
598
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
|
599
|
+
{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
|
600
|
+
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
|
|
601
|
+
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
|
602
|
+
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
|
603
|
+
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
|
604
|
+
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
|
605
|
+
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
|
|
606
|
+
|
|
607
|
+
/* Assertions. */
|
|
608
|
+
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
|
609
|
+
{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
|
|
610
|
+
{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
|
|
611
|
+
{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
|
612
|
+
{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
|
613
|
+
{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
|
|
614
|
+
{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
|
|
615
|
+
{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
|
|
616
|
+
{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
|
|
617
|
+
{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
|
|
618
|
+
{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
|
|
619
|
+
{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
|
|
620
|
+
{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
|
|
621
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
|
|
622
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
|
|
623
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
|
|
624
|
+
{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
|
|
625
|
+
{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
|
|
626
|
+
{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
|
627
|
+
{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
|
|
628
|
+
{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
|
|
629
|
+
{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
|
|
630
|
+
{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
|
|
631
|
+
{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
|
|
632
|
+
{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
|
|
633
|
+
{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
|
|
634
|
+
{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
|
|
635
|
+
{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
|
|
636
|
+
{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
|
|
637
|
+
{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
|
|
638
|
+
{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
|
639
|
+
{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
|
|
640
|
+
{ MU, A, 0, 0, "a(?=)b", "ab" },
|
|
641
|
+
{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
|
|
642
|
+
{ MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
|
|
643
|
+
|
|
644
|
+
/* Not empty, ACCEPT, FAIL */
|
|
645
|
+
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
|
|
646
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
|
|
647
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
|
|
648
|
+
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
|
|
649
|
+
{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
|
|
650
|
+
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
|
|
651
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
|
|
652
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
|
|
653
|
+
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
|
|
654
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
|
|
655
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
|
|
656
|
+
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
|
|
657
|
+
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
|
|
658
|
+
{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
|
|
659
|
+
{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
|
|
660
|
+
{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
|
|
661
|
+
{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
|
|
662
|
+
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
|
|
663
|
+
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
|
|
664
|
+
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
|
|
665
|
+
{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
|
|
666
|
+
|
|
667
|
+
/* Conditional blocks. */
|
|
668
|
+
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
|
|
669
|
+
{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
|
|
670
|
+
{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
|
|
671
|
+
{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
|
|
672
|
+
{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
|
|
673
|
+
{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
|
|
674
|
+
{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
|
675
|
+
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
|
|
676
|
+
{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
|
677
|
+
{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
|
|
678
|
+
{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
|
|
679
|
+
{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
|
|
680
|
+
{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
|
|
681
|
+
{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
|
|
682
|
+
{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
|
|
683
|
+
{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
|
|
684
|
+
{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
|
|
685
|
+
{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
|
|
686
|
+
{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
|
|
687
|
+
{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
|
|
688
|
+
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
|
|
689
|
+
{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
|
|
690
|
+
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
|
691
|
+
{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
|
692
|
+
{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
|
|
693
|
+
{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
|
|
694
|
+
{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
|
|
695
|
+
{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
|
|
696
|
+
{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
|
|
697
|
+
{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
|
|
698
|
+
{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
|
|
699
|
+
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
|
|
700
|
+
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
|
|
701
|
+
{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
|
|
702
|
+
{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
|
|
703
|
+
{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
|
|
704
|
+
{ MU, A, 0, 0, "(?(?!)a)", "ab" },
|
|
705
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
|
|
706
|
+
|
|
707
|
+
/* Set start of match. */
|
|
708
|
+
{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
|
|
709
|
+
{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
|
|
710
|
+
{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
|
|
711
|
+
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
|
|
712
|
+
{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
|
|
713
|
+
|
|
714
|
+
/* First line. */
|
|
715
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
|
|
716
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
|
|
717
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
|
|
718
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
|
|
719
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
|
|
720
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
|
|
721
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
|
|
722
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
|
|
723
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
|
|
724
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
|
|
725
|
+
{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
|
|
726
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
|
|
727
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
|
|
728
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
|
|
729
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
|
|
730
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
|
|
731
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
|
|
732
|
+
{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
|
|
733
|
+
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
|
|
734
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
|
|
735
|
+
{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
|
|
736
|
+
{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
|
|
737
|
+
|
|
738
|
+
/* Recurse. */
|
|
739
|
+
{ MU, A, 0, 0, "(a)(?1)", "aa" },
|
|
740
|
+
{ MU, A, 0, 0, "((a))(?1)", "aa" },
|
|
741
|
+
{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
|
|
742
|
+
{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
|
|
743
|
+
{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
|
|
744
|
+
{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
|
|
745
|
+
{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
|
|
746
|
+
{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
|
|
747
|
+
{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
|
|
748
|
+
{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
|
|
749
|
+
{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
|
|
750
|
+
{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
|
|
751
|
+
{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
|
|
752
|
+
{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
|
|
753
|
+
{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
|
|
754
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
|
|
755
|
+
{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
|
|
756
|
+
{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
|
|
757
|
+
{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
|
|
758
|
+
{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
|
|
759
|
+
{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
|
|
760
|
+
{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
|
|
761
|
+
{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
|
|
762
|
+
{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
|
|
763
|
+
{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
|
|
764
|
+
{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
|
|
765
|
+
{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
|
|
766
|
+
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
|
|
767
|
+
{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
|
|
768
|
+
{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
|
|
769
|
+
{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
|
|
770
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
|
|
771
|
+
|
|
772
|
+
/* 16 bit specific tests. */
|
|
773
|
+
{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
|
|
774
|
+
{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
|
775
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
|
|
776
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
|
|
777
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
|
|
778
|
+
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
|
|
779
|
+
{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
|
|
780
|
+
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
|
|
781
|
+
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
|
|
782
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
|
|
783
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
|
|
784
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
|
|
785
|
+
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
|
|
786
|
+
{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
|
|
787
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
|
|
788
|
+
{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
|
|
789
|
+
{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
|
790
|
+
{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
|
|
791
|
+
{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
|
|
792
|
+
{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
|
|
793
|
+
{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
|
|
794
|
+
{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
|
|
795
|
+
{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
|
|
796
|
+
{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
|
|
797
|
+
{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
|
|
798
|
+
{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
|
|
799
|
+
{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
|
|
800
|
+
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
|
|
801
|
+
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
|
|
802
|
+
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
|
|
803
|
+
{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
|
|
804
|
+
|
|
805
|
+
/* Partial matching. */
|
|
806
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
|
|
807
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
|
|
808
|
+
{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
|
|
809
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
|
|
810
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
|
|
811
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
|
|
812
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
|
|
813
|
+
{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
|
|
814
|
+
|
|
815
|
+
/* (*MARK) verb. */
|
|
816
|
+
{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
|
|
817
|
+
{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
|
|
818
|
+
{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
|
|
819
|
+
{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
|
|
820
|
+
{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
|
|
821
|
+
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
|
|
822
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
|
|
823
|
+
{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
|
|
824
|
+
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
|
825
|
+
{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
|
|
826
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
|
|
827
|
+
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
|
|
828
|
+
{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
|
|
829
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
|
|
830
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
|
|
831
|
+
|
|
832
|
+
/* (*COMMIT) verb. */
|
|
833
|
+
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
|
|
834
|
+
{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
|
|
835
|
+
{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
|
|
836
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
|
|
837
|
+
{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
|
|
838
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
|
|
839
|
+
|
|
840
|
+
/* (*PRUNE) verb. */
|
|
841
|
+
{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
|
|
842
|
+
{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
|
|
843
|
+
{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
|
|
844
|
+
{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
|
|
845
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
|
|
846
|
+
{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
|
|
847
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
|
|
848
|
+
{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
|
|
849
|
+
{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
|
850
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
|
851
|
+
{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
|
852
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
|
|
853
|
+
{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
|
|
854
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
|
|
855
|
+
{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
|
856
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
|
857
|
+
{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
|
858
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
|
|
859
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
|
860
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
|
|
861
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
|
862
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
|
|
863
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
|
864
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
|
|
865
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
|
866
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
|
|
867
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
|
868
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
|
|
869
|
+
{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
|
870
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
|
|
871
|
+
|
|
872
|
+
/* (*SKIP) verb. */
|
|
873
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
|
|
874
|
+
{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
|
|
875
|
+
{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
|
|
876
|
+
{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
|
|
877
|
+
|
|
878
|
+
/* (*THEN) verb. */
|
|
879
|
+
{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
|
|
880
|
+
{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
|
|
881
|
+
{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
|
|
882
|
+
{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
|
|
883
|
+
{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
|
|
884
|
+
{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
|
|
885
|
+
{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
|
|
886
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
|
|
887
|
+
{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
|
|
888
|
+
{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
|
|
889
|
+
{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
|
|
890
|
+
{ MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
|
|
891
|
+
|
|
892
|
+
/* Recurse and control verbs. */
|
|
893
|
+
{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
|
|
894
|
+
{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
|
|
895
|
+
{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
|
|
896
|
+
{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
|
|
897
|
+
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
|
|
898
|
+
{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
|
|
899
|
+
{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
|
|
900
|
+
{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
|
|
901
|
+
|
|
902
|
+
#ifdef SUPPORT_UNICODE
|
|
903
|
+
/* Script runs and iterations. */
|
|
904
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
905
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
906
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
907
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
908
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
909
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
|
|
910
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
|
|
911
|
+
{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
|
|
912
|
+
#endif /* SUPPORT_UNICODE */
|
|
913
|
+
|
|
914
|
+
/* Deep recursion. */
|
|
915
|
+
{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
|
|
916
|
+
{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
|
|
917
|
+
{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
|
|
918
|
+
|
|
919
|
+
/* Deep recursion: Stack limit reached. */
|
|
920
|
+
{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
|
|
921
|
+
{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
|
922
|
+
{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
|
923
|
+
{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
|
924
|
+
{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
|
|
925
|
+
|
|
926
|
+
{ 0, 0, 0, 0, NULL, NULL }
|
|
927
|
+
};
|
|
928
|
+
|
|
929
|
+
#ifdef SUPPORT_PCRE2_8
|
|
930
|
+
static pcre2_jit_stack_8* callback8(void *arg)
|
|
931
|
+
{
|
|
932
|
+
return (pcre2_jit_stack_8 *)arg;
|
|
933
|
+
}
|
|
934
|
+
#endif
|
|
935
|
+
|
|
936
|
+
#ifdef SUPPORT_PCRE2_16
|
|
937
|
+
static pcre2_jit_stack_16* callback16(void *arg)
|
|
938
|
+
{
|
|
939
|
+
return (pcre2_jit_stack_16 *)arg;
|
|
940
|
+
}
|
|
941
|
+
#endif
|
|
942
|
+
|
|
943
|
+
#ifdef SUPPORT_PCRE2_32
|
|
944
|
+
static pcre2_jit_stack_32* callback32(void *arg)
|
|
945
|
+
{
|
|
946
|
+
return (pcre2_jit_stack_32 *)arg;
|
|
947
|
+
}
|
|
948
|
+
#endif
|
|
949
|
+
|
|
950
|
+
#ifdef SUPPORT_PCRE2_8
|
|
951
|
+
static pcre2_jit_stack_8 *stack8;
|
|
952
|
+
|
|
953
|
+
static pcre2_jit_stack_8 *getstack8(void)
|
|
954
|
+
{
|
|
955
|
+
if (!stack8)
|
|
956
|
+
stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
|
|
957
|
+
return stack8;
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
static void setstack8(pcre2_match_context_8 *mcontext)
|
|
961
|
+
{
|
|
962
|
+
if (!mcontext) {
|
|
963
|
+
if (stack8)
|
|
964
|
+
pcre2_jit_stack_free_8(stack8);
|
|
965
|
+
stack8 = NULL;
|
|
966
|
+
return;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
|
|
970
|
+
}
|
|
971
|
+
#endif /* SUPPORT_PCRE2_8 */
|
|
972
|
+
|
|
973
|
+
#ifdef SUPPORT_PCRE2_16
|
|
974
|
+
static pcre2_jit_stack_16 *stack16;
|
|
975
|
+
|
|
976
|
+
static pcre2_jit_stack_16 *getstack16(void)
|
|
977
|
+
{
|
|
978
|
+
if (!stack16)
|
|
979
|
+
stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
|
|
980
|
+
return stack16;
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
static void setstack16(pcre2_match_context_16 *mcontext)
|
|
984
|
+
{
|
|
985
|
+
if (!mcontext) {
|
|
986
|
+
if (stack16)
|
|
987
|
+
pcre2_jit_stack_free_16(stack16);
|
|
988
|
+
stack16 = NULL;
|
|
989
|
+
return;
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
|
|
993
|
+
}
|
|
994
|
+
#endif /* SUPPORT_PCRE2_16 */
|
|
995
|
+
|
|
996
|
+
#ifdef SUPPORT_PCRE2_32
|
|
997
|
+
static pcre2_jit_stack_32 *stack32;
|
|
998
|
+
|
|
999
|
+
static pcre2_jit_stack_32 *getstack32(void)
|
|
1000
|
+
{
|
|
1001
|
+
if (!stack32)
|
|
1002
|
+
stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
|
|
1003
|
+
return stack32;
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
static void setstack32(pcre2_match_context_32 *mcontext)
|
|
1007
|
+
{
|
|
1008
|
+
if (!mcontext) {
|
|
1009
|
+
if (stack32)
|
|
1010
|
+
pcre2_jit_stack_free_32(stack32);
|
|
1011
|
+
stack32 = NULL;
|
|
1012
|
+
return;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
|
|
1016
|
+
}
|
|
1017
|
+
#endif /* SUPPORT_PCRE2_32 */
|
|
1018
|
+
|
|
1019
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1020
|
+
|
|
1021
|
+
static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
|
|
1022
|
+
{
|
|
1023
|
+
PCRE2_SPTR8 iptr = input;
|
|
1024
|
+
PCRE2_UCHAR16 *optr = output;
|
|
1025
|
+
unsigned int c;
|
|
1026
|
+
|
|
1027
|
+
if (max_length == 0)
|
|
1028
|
+
return 0;
|
|
1029
|
+
|
|
1030
|
+
while (*iptr && max_length > 1) {
|
|
1031
|
+
c = 0;
|
|
1032
|
+
if (offsetmap)
|
|
1033
|
+
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
|
1034
|
+
|
|
1035
|
+
if (*iptr < 0xc0)
|
|
1036
|
+
c = *iptr++;
|
|
1037
|
+
else if (!(*iptr & 0x20)) {
|
|
1038
|
+
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
|
1039
|
+
iptr += 2;
|
|
1040
|
+
} else if (!(*iptr & 0x10)) {
|
|
1041
|
+
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
|
1042
|
+
iptr += 3;
|
|
1043
|
+
} else if (!(*iptr & 0x08)) {
|
|
1044
|
+
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
|
1045
|
+
iptr += 4;
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
if (c < 65536) {
|
|
1049
|
+
*optr++ = c;
|
|
1050
|
+
max_length--;
|
|
1051
|
+
} else if (max_length <= 2) {
|
|
1052
|
+
*optr = '\0';
|
|
1053
|
+
return (int)(optr - output);
|
|
1054
|
+
} else {
|
|
1055
|
+
c -= 0x10000;
|
|
1056
|
+
*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
|
|
1057
|
+
*optr++ = 0xdc00 | (c & 0x3ff);
|
|
1058
|
+
max_length -= 2;
|
|
1059
|
+
if (offsetmap)
|
|
1060
|
+
offsetmap++;
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
if (offsetmap)
|
|
1064
|
+
*offsetmap = (int)(iptr - (unsigned char*)input);
|
|
1065
|
+
*optr = '\0';
|
|
1066
|
+
return (int)(optr - output);
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
|
|
1070
|
+
{
|
|
1071
|
+
PCRE2_SPTR8 iptr = input;
|
|
1072
|
+
PCRE2_UCHAR16 *optr = output;
|
|
1073
|
+
|
|
1074
|
+
if (max_length == 0)
|
|
1075
|
+
return 0;
|
|
1076
|
+
|
|
1077
|
+
while (*iptr && max_length > 1) {
|
|
1078
|
+
*optr++ = *iptr++;
|
|
1079
|
+
max_length--;
|
|
1080
|
+
}
|
|
1081
|
+
*optr = '\0';
|
|
1082
|
+
return (int)(optr - output);
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
#define REGTEST_MAX_LENGTH16 4096
|
|
1086
|
+
static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
|
|
1087
|
+
static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
|
|
1088
|
+
|
|
1089
|
+
#endif /* SUPPORT_PCRE2_16 */
|
|
1090
|
+
|
|
1091
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1092
|
+
|
|
1093
|
+
static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
|
|
1094
|
+
{
|
|
1095
|
+
PCRE2_SPTR8 iptr = input;
|
|
1096
|
+
PCRE2_UCHAR32 *optr = output;
|
|
1097
|
+
unsigned int c;
|
|
1098
|
+
|
|
1099
|
+
if (max_length == 0)
|
|
1100
|
+
return 0;
|
|
1101
|
+
|
|
1102
|
+
while (*iptr && max_length > 1) {
|
|
1103
|
+
c = 0;
|
|
1104
|
+
if (offsetmap)
|
|
1105
|
+
*offsetmap++ = (int)(iptr - (unsigned char*)input);
|
|
1106
|
+
|
|
1107
|
+
if (*iptr < 0xc0)
|
|
1108
|
+
c = *iptr++;
|
|
1109
|
+
else if (!(*iptr & 0x20)) {
|
|
1110
|
+
c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
|
|
1111
|
+
iptr += 2;
|
|
1112
|
+
} else if (!(*iptr & 0x10)) {
|
|
1113
|
+
c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
|
|
1114
|
+
iptr += 3;
|
|
1115
|
+
} else if (!(*iptr & 0x08)) {
|
|
1116
|
+
c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
|
|
1117
|
+
iptr += 4;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
*optr++ = c;
|
|
1121
|
+
max_length--;
|
|
1122
|
+
}
|
|
1123
|
+
if (offsetmap)
|
|
1124
|
+
*offsetmap = (int)(iptr - (unsigned char*)input);
|
|
1125
|
+
*optr = 0;
|
|
1126
|
+
return (int)(optr - output);
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
|
|
1130
|
+
{
|
|
1131
|
+
PCRE2_SPTR8 iptr = input;
|
|
1132
|
+
PCRE2_UCHAR32 *optr = output;
|
|
1133
|
+
|
|
1134
|
+
if (max_length == 0)
|
|
1135
|
+
return 0;
|
|
1136
|
+
|
|
1137
|
+
while (*iptr && max_length > 1) {
|
|
1138
|
+
*optr++ = *iptr++;
|
|
1139
|
+
max_length--;
|
|
1140
|
+
}
|
|
1141
|
+
*optr = '\0';
|
|
1142
|
+
return (int)(optr - output);
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
#define REGTEST_MAX_LENGTH32 4096
|
|
1146
|
+
static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
|
|
1147
|
+
static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
|
|
1148
|
+
|
|
1149
|
+
#endif /* SUPPORT_PCRE2_32 */
|
|
1150
|
+
|
|
1151
|
+
static int check_ascii(const char *input)
|
|
1152
|
+
{
|
|
1153
|
+
const unsigned char *ptr = (unsigned char *)input;
|
|
1154
|
+
while (*ptr) {
|
|
1155
|
+
if (*ptr > 127)
|
|
1156
|
+
return 0;
|
|
1157
|
+
ptr++;
|
|
1158
|
+
}
|
|
1159
|
+
return 1;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
#define OVECTOR_SIZE 15
|
|
1163
|
+
|
|
1164
|
+
static int regression_tests(void)
|
|
1165
|
+
{
|
|
1166
|
+
struct regression_test_case *current = regression_test_cases;
|
|
1167
|
+
int error;
|
|
1168
|
+
PCRE2_SIZE err_offs;
|
|
1169
|
+
int is_successful;
|
|
1170
|
+
int is_ascii;
|
|
1171
|
+
int total = 0;
|
|
1172
|
+
int successful = 0;
|
|
1173
|
+
int successful_row = 0;
|
|
1174
|
+
int counter = 0;
|
|
1175
|
+
int jit_compile_mode;
|
|
1176
|
+
int utf = 0;
|
|
1177
|
+
uint32_t disabled_options = 0;
|
|
1178
|
+
int i;
|
|
1179
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1180
|
+
pcre2_code_8 *re8;
|
|
1181
|
+
pcre2_compile_context_8 *ccontext8;
|
|
1182
|
+
pcre2_match_data_8 *mdata8_1;
|
|
1183
|
+
pcre2_match_data_8 *mdata8_2;
|
|
1184
|
+
pcre2_match_context_8 *mcontext8;
|
|
1185
|
+
PCRE2_SIZE *ovector8_1 = NULL;
|
|
1186
|
+
PCRE2_SIZE *ovector8_2 = NULL;
|
|
1187
|
+
int return_value8[2];
|
|
1188
|
+
#endif
|
|
1189
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1190
|
+
pcre2_code_16 *re16;
|
|
1191
|
+
pcre2_compile_context_16 *ccontext16;
|
|
1192
|
+
pcre2_match_data_16 *mdata16_1;
|
|
1193
|
+
pcre2_match_data_16 *mdata16_2;
|
|
1194
|
+
pcre2_match_context_16 *mcontext16;
|
|
1195
|
+
PCRE2_SIZE *ovector16_1 = NULL;
|
|
1196
|
+
PCRE2_SIZE *ovector16_2 = NULL;
|
|
1197
|
+
int return_value16[2];
|
|
1198
|
+
int length16;
|
|
1199
|
+
#endif
|
|
1200
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1201
|
+
pcre2_code_32 *re32;
|
|
1202
|
+
pcre2_compile_context_32 *ccontext32;
|
|
1203
|
+
pcre2_match_data_32 *mdata32_1;
|
|
1204
|
+
pcre2_match_data_32 *mdata32_2;
|
|
1205
|
+
pcre2_match_context_32 *mcontext32;
|
|
1206
|
+
PCRE2_SIZE *ovector32_1 = NULL;
|
|
1207
|
+
PCRE2_SIZE *ovector32_2 = NULL;
|
|
1208
|
+
int return_value32[2];
|
|
1209
|
+
int length32;
|
|
1210
|
+
#endif
|
|
1211
|
+
|
|
1212
|
+
#if defined SUPPORT_PCRE2_8
|
|
1213
|
+
PCRE2_UCHAR8 cpu_info[128];
|
|
1214
|
+
#elif defined SUPPORT_PCRE2_16
|
|
1215
|
+
PCRE2_UCHAR16 cpu_info[128];
|
|
1216
|
+
#elif defined SUPPORT_PCRE2_32
|
|
1217
|
+
PCRE2_UCHAR32 cpu_info[128];
|
|
1218
|
+
#endif
|
|
1219
|
+
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
|
1220
|
+
int return_value;
|
|
1221
|
+
#endif
|
|
1222
|
+
|
|
1223
|
+
/* This test compares the behaviour of interpreter and JIT. Although disabling
|
|
1224
|
+
utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
|
|
1225
|
+
still considered successful from pcre2_jit_test point of view. */
|
|
1226
|
+
|
|
1227
|
+
#if defined SUPPORT_PCRE2_8
|
|
1228
|
+
pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
|
1229
|
+
#elif defined SUPPORT_PCRE2_16
|
|
1230
|
+
pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
|
1231
|
+
#elif defined SUPPORT_PCRE2_32
|
|
1232
|
+
pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
|
|
1233
|
+
#endif
|
|
1234
|
+
|
|
1235
|
+
printf("Running JIT regression tests\n");
|
|
1236
|
+
printf(" target CPU of SLJIT compiler: ");
|
|
1237
|
+
for (i = 0; cpu_info[i]; i++)
|
|
1238
|
+
printf("%c", (char)(cpu_info[i]));
|
|
1239
|
+
printf("\n");
|
|
1240
|
+
|
|
1241
|
+
#if defined SUPPORT_PCRE2_8
|
|
1242
|
+
pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
|
|
1243
|
+
#elif defined SUPPORT_PCRE2_16
|
|
1244
|
+
pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
|
|
1245
|
+
#elif defined SUPPORT_PCRE2_32
|
|
1246
|
+
pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
|
|
1247
|
+
#endif
|
|
1248
|
+
|
|
1249
|
+
if (!utf)
|
|
1250
|
+
disabled_options |= PCRE2_UTF;
|
|
1251
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1252
|
+
printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
|
|
1253
|
+
#endif
|
|
1254
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1255
|
+
printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
|
|
1256
|
+
#endif
|
|
1257
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1258
|
+
printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
|
|
1259
|
+
#endif
|
|
1260
|
+
|
|
1261
|
+
while (current->pattern) {
|
|
1262
|
+
/* printf("\nPattern: %s :\n", current->pattern); */
|
|
1263
|
+
total++;
|
|
1264
|
+
is_ascii = 0;
|
|
1265
|
+
if (!(current->start_offset & F_PROPERTY))
|
|
1266
|
+
is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
|
|
1267
|
+
|
|
1268
|
+
if (current->match_options & PCRE2_PARTIAL_SOFT)
|
|
1269
|
+
jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
|
|
1270
|
+
else if (current->match_options & PCRE2_PARTIAL_HARD)
|
|
1271
|
+
jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
|
|
1272
|
+
else
|
|
1273
|
+
jit_compile_mode = PCRE2_JIT_COMPLETE;
|
|
1274
|
+
error = 0;
|
|
1275
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1276
|
+
re8 = NULL;
|
|
1277
|
+
ccontext8 = pcre2_compile_context_create_8(NULL);
|
|
1278
|
+
if (ccontext8) {
|
|
1279
|
+
if (GET_NEWLINE(current->newline))
|
|
1280
|
+
pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
|
|
1281
|
+
if (GET_BSR(current->newline))
|
|
1282
|
+
pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
|
|
1283
|
+
|
|
1284
|
+
if (!(current->start_offset & F_NO8)) {
|
|
1285
|
+
re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
|
|
1286
|
+
current->compile_options & ~disabled_options,
|
|
1287
|
+
&error, &err_offs, ccontext8);
|
|
1288
|
+
|
|
1289
|
+
if (!re8 && (utf || is_ascii))
|
|
1290
|
+
printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
|
1291
|
+
}
|
|
1292
|
+
pcre2_compile_context_free_8(ccontext8);
|
|
1293
|
+
}
|
|
1294
|
+
else
|
|
1295
|
+
printf("\n8 bit: Cannot allocate compile context\n");
|
|
1296
|
+
#endif
|
|
1297
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1298
|
+
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
|
1299
|
+
convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
|
|
1300
|
+
else
|
|
1301
|
+
copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
|
|
1302
|
+
|
|
1303
|
+
re16 = NULL;
|
|
1304
|
+
ccontext16 = pcre2_compile_context_create_16(NULL);
|
|
1305
|
+
if (ccontext16) {
|
|
1306
|
+
if (GET_NEWLINE(current->newline))
|
|
1307
|
+
pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
|
|
1308
|
+
if (GET_BSR(current->newline))
|
|
1309
|
+
pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
|
|
1310
|
+
|
|
1311
|
+
if (!(current->start_offset & F_NO16)) {
|
|
1312
|
+
re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
|
|
1313
|
+
current->compile_options & ~disabled_options,
|
|
1314
|
+
&error, &err_offs, ccontext16);
|
|
1315
|
+
|
|
1316
|
+
if (!re16 && (utf || is_ascii))
|
|
1317
|
+
printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
|
1318
|
+
}
|
|
1319
|
+
pcre2_compile_context_free_16(ccontext16);
|
|
1320
|
+
}
|
|
1321
|
+
else
|
|
1322
|
+
printf("\n16 bit: Cannot allocate compile context\n");
|
|
1323
|
+
#endif
|
|
1324
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1325
|
+
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
|
1326
|
+
convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
|
|
1327
|
+
else
|
|
1328
|
+
copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
|
|
1329
|
+
|
|
1330
|
+
re32 = NULL;
|
|
1331
|
+
ccontext32 = pcre2_compile_context_create_32(NULL);
|
|
1332
|
+
if (ccontext32) {
|
|
1333
|
+
if (GET_NEWLINE(current->newline))
|
|
1334
|
+
pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
|
|
1335
|
+
if (GET_BSR(current->newline))
|
|
1336
|
+
pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
|
|
1337
|
+
|
|
1338
|
+
if (!(current->start_offset & F_NO32)) {
|
|
1339
|
+
re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
|
|
1340
|
+
current->compile_options & ~disabled_options,
|
|
1341
|
+
&error, &err_offs, ccontext32);
|
|
1342
|
+
|
|
1343
|
+
if (!re32 && (utf || is_ascii))
|
|
1344
|
+
printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
|
|
1345
|
+
}
|
|
1346
|
+
pcre2_compile_context_free_32(ccontext32);
|
|
1347
|
+
}
|
|
1348
|
+
else
|
|
1349
|
+
printf("\n32 bit: Cannot allocate compile context\n");
|
|
1350
|
+
#endif
|
|
1351
|
+
|
|
1352
|
+
counter++;
|
|
1353
|
+
if ((counter & 0x3) != 0) {
|
|
1354
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1355
|
+
setstack8(NULL);
|
|
1356
|
+
#endif
|
|
1357
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1358
|
+
setstack16(NULL);
|
|
1359
|
+
#endif
|
|
1360
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1361
|
+
setstack32(NULL);
|
|
1362
|
+
#endif
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1366
|
+
return_value8[0] = -1000;
|
|
1367
|
+
return_value8[1] = -1000;
|
|
1368
|
+
mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
|
1369
|
+
mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
|
|
1370
|
+
mcontext8 = pcre2_match_context_create_8(NULL);
|
|
1371
|
+
if (!mdata8_1 || !mdata8_2 || !mcontext8) {
|
|
1372
|
+
printf("\n8 bit: Cannot allocate match data\n");
|
|
1373
|
+
pcre2_match_data_free_8(mdata8_1);
|
|
1374
|
+
pcre2_match_data_free_8(mdata8_2);
|
|
1375
|
+
pcre2_match_context_free_8(mcontext8);
|
|
1376
|
+
pcre2_code_free_8(re8);
|
|
1377
|
+
re8 = NULL;
|
|
1378
|
+
} else {
|
|
1379
|
+
ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
|
|
1380
|
+
ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
|
|
1381
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1382
|
+
ovector8_1[i] = (PCRE2_SIZE)(-2);
|
|
1383
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1384
|
+
ovector8_2[i] = (PCRE2_SIZE)(-2);
|
|
1385
|
+
pcre2_set_match_limit_8(mcontext8, 10000000);
|
|
1386
|
+
}
|
|
1387
|
+
if (re8) {
|
|
1388
|
+
return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
|
1389
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
|
|
1390
|
+
|
|
1391
|
+
if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
|
|
1392
|
+
printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
|
1393
|
+
} else if ((counter & 0x1) != 0) {
|
|
1394
|
+
setstack8(mcontext8);
|
|
1395
|
+
return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
|
1396
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
|
1397
|
+
} else {
|
|
1398
|
+
pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
|
|
1399
|
+
return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
|
|
1400
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
#endif
|
|
1404
|
+
|
|
1405
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1406
|
+
return_value16[0] = -1000;
|
|
1407
|
+
return_value16[1] = -1000;
|
|
1408
|
+
mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
|
1409
|
+
mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
|
|
1410
|
+
mcontext16 = pcre2_match_context_create_16(NULL);
|
|
1411
|
+
if (!mdata16_1 || !mdata16_2 || !mcontext16) {
|
|
1412
|
+
printf("\n16 bit: Cannot allocate match data\n");
|
|
1413
|
+
pcre2_match_data_free_16(mdata16_1);
|
|
1414
|
+
pcre2_match_data_free_16(mdata16_2);
|
|
1415
|
+
pcre2_match_context_free_16(mcontext16);
|
|
1416
|
+
pcre2_code_free_16(re16);
|
|
1417
|
+
re16 = NULL;
|
|
1418
|
+
} else {
|
|
1419
|
+
ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
|
|
1420
|
+
ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
|
|
1421
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1422
|
+
ovector16_1[i] = (PCRE2_SIZE)(-2);
|
|
1423
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1424
|
+
ovector16_2[i] = (PCRE2_SIZE)(-2);
|
|
1425
|
+
pcre2_set_match_limit_16(mcontext16, 10000000);
|
|
1426
|
+
}
|
|
1427
|
+
if (re16) {
|
|
1428
|
+
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
|
1429
|
+
length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
|
|
1430
|
+
else
|
|
1431
|
+
length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
|
|
1432
|
+
|
|
1433
|
+
return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
|
|
1434
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
|
|
1435
|
+
|
|
1436
|
+
if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
|
|
1437
|
+
printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
|
1438
|
+
} else if ((counter & 0x1) != 0) {
|
|
1439
|
+
setstack16(mcontext16);
|
|
1440
|
+
return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
|
|
1441
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
|
1442
|
+
} else {
|
|
1443
|
+
pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
|
|
1444
|
+
return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
|
|
1445
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
#endif
|
|
1449
|
+
|
|
1450
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1451
|
+
return_value32[0] = -1000;
|
|
1452
|
+
return_value32[1] = -1000;
|
|
1453
|
+
mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
|
1454
|
+
mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
|
|
1455
|
+
mcontext32 = pcre2_match_context_create_32(NULL);
|
|
1456
|
+
if (!mdata32_1 || !mdata32_2 || !mcontext32) {
|
|
1457
|
+
printf("\n32 bit: Cannot allocate match data\n");
|
|
1458
|
+
pcre2_match_data_free_32(mdata32_1);
|
|
1459
|
+
pcre2_match_data_free_32(mdata32_2);
|
|
1460
|
+
pcre2_match_context_free_32(mcontext32);
|
|
1461
|
+
pcre2_code_free_32(re32);
|
|
1462
|
+
re32 = NULL;
|
|
1463
|
+
} else {
|
|
1464
|
+
ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
|
|
1465
|
+
ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
|
|
1466
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1467
|
+
ovector32_1[i] = (PCRE2_SIZE)(-2);
|
|
1468
|
+
for (i = 0; i < OVECTOR_SIZE * 2; ++i)
|
|
1469
|
+
ovector32_2[i] = (PCRE2_SIZE)(-2);
|
|
1470
|
+
pcre2_set_match_limit_32(mcontext32, 10000000);
|
|
1471
|
+
}
|
|
1472
|
+
if (re32) {
|
|
1473
|
+
if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
|
|
1474
|
+
length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
|
|
1475
|
+
else
|
|
1476
|
+
length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
|
|
1477
|
+
|
|
1478
|
+
return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
|
|
1479
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
|
|
1480
|
+
|
|
1481
|
+
if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
|
|
1482
|
+
printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
|
|
1483
|
+
} else if ((counter & 0x1) != 0) {
|
|
1484
|
+
setstack32(mcontext32);
|
|
1485
|
+
return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
|
|
1486
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
|
1487
|
+
} else {
|
|
1488
|
+
pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
|
|
1489
|
+
return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
|
|
1490
|
+
current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
|
|
1491
|
+
}
|
|
1492
|
+
}
|
|
1493
|
+
#endif
|
|
1494
|
+
|
|
1495
|
+
/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
|
|
1496
|
+
return_value8[0], return_value16[0], return_value32[0],
|
|
1497
|
+
(int)ovector8_1[0], (int)ovector8_1[1],
|
|
1498
|
+
(int)ovector16_1[0], (int)ovector16_1[1],
|
|
1499
|
+
(int)ovector32_1[0], (int)ovector32_1[1],
|
|
1500
|
+
(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
|
|
1501
|
+
|
|
1502
|
+
/* If F_DIFF is set, just run the test, but do not compare the results.
|
|
1503
|
+
Segfaults can still be captured. */
|
|
1504
|
+
|
|
1505
|
+
is_successful = 1;
|
|
1506
|
+
if (!(current->start_offset & F_DIFF)) {
|
|
1507
|
+
#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
|
|
1508
|
+
if (!(current->start_offset & F_FORCECONV)) {
|
|
1509
|
+
|
|
1510
|
+
/* All results must be the same. */
|
|
1511
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1512
|
+
if ((return_value = return_value8[0]) != return_value8[1]) {
|
|
1513
|
+
printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
|
|
1514
|
+
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
|
1515
|
+
is_successful = 0;
|
|
1516
|
+
} else
|
|
1517
|
+
#endif
|
|
1518
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1519
|
+
if ((return_value = return_value16[0]) != return_value16[1]) {
|
|
1520
|
+
printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
|
|
1521
|
+
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
|
1522
|
+
is_successful = 0;
|
|
1523
|
+
} else
|
|
1524
|
+
#endif
|
|
1525
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1526
|
+
if ((return_value = return_value32[0]) != return_value32[1]) {
|
|
1527
|
+
printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
|
|
1528
|
+
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
|
1529
|
+
is_successful = 0;
|
|
1530
|
+
} else
|
|
1531
|
+
#endif
|
|
1532
|
+
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
|
1533
|
+
if (return_value8[0] != return_value16[0]) {
|
|
1534
|
+
printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
|
|
1535
|
+
return_value8[0], return_value16[0],
|
|
1536
|
+
total, current->pattern, current->input);
|
|
1537
|
+
is_successful = 0;
|
|
1538
|
+
} else
|
|
1539
|
+
#endif
|
|
1540
|
+
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
|
1541
|
+
if (return_value8[0] != return_value32[0]) {
|
|
1542
|
+
printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
|
1543
|
+
return_value8[0], return_value32[0],
|
|
1544
|
+
total, current->pattern, current->input);
|
|
1545
|
+
is_successful = 0;
|
|
1546
|
+
} else
|
|
1547
|
+
#endif
|
|
1548
|
+
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
|
1549
|
+
if (return_value16[0] != return_value32[0]) {
|
|
1550
|
+
printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
|
|
1551
|
+
return_value16[0], return_value32[0],
|
|
1552
|
+
total, current->pattern, current->input);
|
|
1553
|
+
is_successful = 0;
|
|
1554
|
+
} else
|
|
1555
|
+
#endif
|
|
1556
|
+
if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
|
|
1557
|
+
if (return_value == PCRE2_ERROR_PARTIAL) {
|
|
1558
|
+
return_value = 2;
|
|
1559
|
+
} else {
|
|
1560
|
+
return_value *= 2;
|
|
1561
|
+
}
|
|
1562
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1563
|
+
return_value8[0] = return_value;
|
|
1564
|
+
#endif
|
|
1565
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1566
|
+
return_value16[0] = return_value;
|
|
1567
|
+
#endif
|
|
1568
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1569
|
+
return_value32[0] = return_value;
|
|
1570
|
+
#endif
|
|
1571
|
+
/* Transform back the results. */
|
|
1572
|
+
if (current->compile_options & PCRE2_UTF) {
|
|
1573
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1574
|
+
for (i = 0; i < return_value; ++i) {
|
|
1575
|
+
if (ovector16_1[i] != PCRE2_UNSET)
|
|
1576
|
+
ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
|
|
1577
|
+
if (ovector16_2[i] != PCRE2_UNSET)
|
|
1578
|
+
ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
|
|
1579
|
+
}
|
|
1580
|
+
#endif
|
|
1581
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1582
|
+
for (i = 0; i < return_value; ++i) {
|
|
1583
|
+
if (ovector32_1[i] != PCRE2_UNSET)
|
|
1584
|
+
ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
|
|
1585
|
+
if (ovector32_2[i] != PCRE2_UNSET)
|
|
1586
|
+
ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
|
|
1587
|
+
}
|
|
1588
|
+
#endif
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
for (i = 0; i < return_value; ++i) {
|
|
1592
|
+
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
|
|
1593
|
+
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
|
|
1594
|
+
printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
|
|
1595
|
+
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
|
|
1596
|
+
total, current->pattern, current->input);
|
|
1597
|
+
is_successful = 0;
|
|
1598
|
+
}
|
|
1599
|
+
#endif
|
|
1600
|
+
#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
|
|
1601
|
+
if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
|
|
1602
|
+
printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
|
1603
|
+
i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
|
1604
|
+
total, current->pattern, current->input);
|
|
1605
|
+
is_successful = 0;
|
|
1606
|
+
}
|
|
1607
|
+
#endif
|
|
1608
|
+
#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
|
|
1609
|
+
if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
|
|
1610
|
+
printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
|
|
1611
|
+
i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
|
|
1612
|
+
total, current->pattern, current->input);
|
|
1613
|
+
is_successful = 0;
|
|
1614
|
+
}
|
|
1615
|
+
#endif
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
} else
|
|
1619
|
+
#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
|
|
1620
|
+
{
|
|
1621
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1622
|
+
if (return_value8[0] != return_value8[1]) {
|
|
1623
|
+
printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1624
|
+
return_value8[0], return_value8[1], total, current->pattern, current->input);
|
|
1625
|
+
is_successful = 0;
|
|
1626
|
+
} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
|
|
1627
|
+
if (return_value8[0] == PCRE2_ERROR_PARTIAL)
|
|
1628
|
+
return_value8[0] = 2;
|
|
1629
|
+
else
|
|
1630
|
+
return_value8[0] *= 2;
|
|
1631
|
+
|
|
1632
|
+
for (i = 0; i < return_value8[0]; ++i)
|
|
1633
|
+
if (ovector8_1[i] != ovector8_2[i]) {
|
|
1634
|
+
printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1635
|
+
i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
|
|
1636
|
+
is_successful = 0;
|
|
1637
|
+
}
|
|
1638
|
+
}
|
|
1639
|
+
#endif
|
|
1640
|
+
|
|
1641
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1642
|
+
if (return_value16[0] != return_value16[1]) {
|
|
1643
|
+
printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1644
|
+
return_value16[0], return_value16[1], total, current->pattern, current->input);
|
|
1645
|
+
is_successful = 0;
|
|
1646
|
+
} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
|
|
1647
|
+
if (return_value16[0] == PCRE2_ERROR_PARTIAL)
|
|
1648
|
+
return_value16[0] = 2;
|
|
1649
|
+
else
|
|
1650
|
+
return_value16[0] *= 2;
|
|
1651
|
+
|
|
1652
|
+
for (i = 0; i < return_value16[0]; ++i)
|
|
1653
|
+
if (ovector16_1[i] != ovector16_2[i]) {
|
|
1654
|
+
printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1655
|
+
i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
|
|
1656
|
+
is_successful = 0;
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
#endif
|
|
1660
|
+
|
|
1661
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1662
|
+
if (return_value32[0] != return_value32[1]) {
|
|
1663
|
+
printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1664
|
+
return_value32[0], return_value32[1], total, current->pattern, current->input);
|
|
1665
|
+
is_successful = 0;
|
|
1666
|
+
} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
|
|
1667
|
+
if (return_value32[0] == PCRE2_ERROR_PARTIAL)
|
|
1668
|
+
return_value32[0] = 2;
|
|
1669
|
+
else
|
|
1670
|
+
return_value32[0] *= 2;
|
|
1671
|
+
|
|
1672
|
+
for (i = 0; i < return_value32[0]; ++i)
|
|
1673
|
+
if (ovector32_1[i] != ovector32_2[i]) {
|
|
1674
|
+
printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
|
|
1675
|
+
i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
|
|
1676
|
+
is_successful = 0;
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
#endif
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
if (is_successful) {
|
|
1684
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1685
|
+
if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
|
|
1686
|
+
if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
|
1687
|
+
printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
|
|
1688
|
+
total, current->pattern, current->input);
|
|
1689
|
+
is_successful = 0;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
|
1693
|
+
printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
|
1694
|
+
total, current->pattern, current->input);
|
|
1695
|
+
is_successful = 0;
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
#endif
|
|
1699
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1700
|
+
if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
|
|
1701
|
+
if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
|
1702
|
+
printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
|
|
1703
|
+
total, current->pattern, current->input);
|
|
1704
|
+
is_successful = 0;
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
|
1708
|
+
printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
|
1709
|
+
total, current->pattern, current->input);
|
|
1710
|
+
is_successful = 0;
|
|
1711
|
+
}
|
|
1712
|
+
}
|
|
1713
|
+
#endif
|
|
1714
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1715
|
+
if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
|
|
1716
|
+
if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
|
|
1717
|
+
printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
|
|
1718
|
+
total, current->pattern, current->input);
|
|
1719
|
+
is_successful = 0;
|
|
1720
|
+
}
|
|
1721
|
+
|
|
1722
|
+
if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
|
|
1723
|
+
printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
|
|
1724
|
+
total, current->pattern, current->input);
|
|
1725
|
+
is_successful = 0;
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
#endif
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
if (is_successful) {
|
|
1732
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1733
|
+
if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
|
|
1734
|
+
printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
|
1735
|
+
total, current->pattern, current->input);
|
|
1736
|
+
is_successful = 0;
|
|
1737
|
+
}
|
|
1738
|
+
#endif
|
|
1739
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1740
|
+
if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
|
|
1741
|
+
printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
|
1742
|
+
total, current->pattern, current->input);
|
|
1743
|
+
is_successful = 0;
|
|
1744
|
+
}
|
|
1745
|
+
#endif
|
|
1746
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1747
|
+
if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
|
|
1748
|
+
printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
|
|
1749
|
+
total, current->pattern, current->input);
|
|
1750
|
+
is_successful = 0;
|
|
1751
|
+
}
|
|
1752
|
+
#endif
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1756
|
+
pcre2_code_free_8(re8);
|
|
1757
|
+
pcre2_match_data_free_8(mdata8_1);
|
|
1758
|
+
pcre2_match_data_free_8(mdata8_2);
|
|
1759
|
+
pcre2_match_context_free_8(mcontext8);
|
|
1760
|
+
#endif
|
|
1761
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1762
|
+
pcre2_code_free_16(re16);
|
|
1763
|
+
pcre2_match_data_free_16(mdata16_1);
|
|
1764
|
+
pcre2_match_data_free_16(mdata16_2);
|
|
1765
|
+
pcre2_match_context_free_16(mcontext16);
|
|
1766
|
+
#endif
|
|
1767
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1768
|
+
pcre2_code_free_32(re32);
|
|
1769
|
+
pcre2_match_data_free_32(mdata32_1);
|
|
1770
|
+
pcre2_match_data_free_32(mdata32_2);
|
|
1771
|
+
pcre2_match_context_free_32(mcontext32);
|
|
1772
|
+
#endif
|
|
1773
|
+
|
|
1774
|
+
if (is_successful) {
|
|
1775
|
+
successful++;
|
|
1776
|
+
successful_row++;
|
|
1777
|
+
printf(".");
|
|
1778
|
+
if (successful_row >= 60) {
|
|
1779
|
+
successful_row = 0;
|
|
1780
|
+
printf("\n");
|
|
1781
|
+
}
|
|
1782
|
+
} else
|
|
1783
|
+
successful_row = 0;
|
|
1784
|
+
|
|
1785
|
+
fflush(stdout);
|
|
1786
|
+
current++;
|
|
1787
|
+
}
|
|
1788
|
+
#ifdef SUPPORT_PCRE2_8
|
|
1789
|
+
setstack8(NULL);
|
|
1790
|
+
#endif
|
|
1791
|
+
#ifdef SUPPORT_PCRE2_16
|
|
1792
|
+
setstack16(NULL);
|
|
1793
|
+
#endif
|
|
1794
|
+
#ifdef SUPPORT_PCRE2_32
|
|
1795
|
+
setstack32(NULL);
|
|
1796
|
+
#endif
|
|
1797
|
+
|
|
1798
|
+
if (total == successful) {
|
|
1799
|
+
printf("\nAll JIT regression tests are successfully passed.\n");
|
|
1800
|
+
return 0;
|
|
1801
|
+
} else {
|
|
1802
|
+
printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
|
1803
|
+
return 1;
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
#if defined SUPPORT_UNICODE
|
|
1808
|
+
|
|
1809
|
+
static int check_invalid_utf_result(int pattern_index, const char *type, int result,
|
|
1810
|
+
int match_start, int match_end, PCRE2_SIZE *ovector)
|
|
1811
|
+
{
|
|
1812
|
+
if (match_start < 0) {
|
|
1813
|
+
if (result != -1) {
|
|
1814
|
+
printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
|
|
1815
|
+
return 1;
|
|
1816
|
+
}
|
|
1817
|
+
return 0;
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
if (result <= 0) {
|
|
1821
|
+
printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
|
|
1822
|
+
return 1;
|
|
1823
|
+
}
|
|
1824
|
+
|
|
1825
|
+
if (ovector[0] != (PCRE2_SIZE)match_start) {
|
|
1826
|
+
printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
|
|
1827
|
+
pattern_index, type, (int)ovector[0], match_start);
|
|
1828
|
+
return 1;
|
|
1829
|
+
}
|
|
1830
|
+
|
|
1831
|
+
if (ovector[1] != (PCRE2_SIZE)match_end) {
|
|
1832
|
+
printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
|
|
1833
|
+
pattern_index, type, (int)ovector[1], match_end);
|
|
1834
|
+
return 1;
|
|
1835
|
+
}
|
|
1836
|
+
|
|
1837
|
+
return 0;
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
#endif /* SUPPORT_UNICODE */
|
|
1841
|
+
|
|
1842
|
+
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
|
|
1843
|
+
|
|
1844
|
+
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
|
1845
|
+
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
|
1846
|
+
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
|
1847
|
+
|
|
1848
|
+
struct invalid_utf8_regression_test_case {
|
|
1849
|
+
uint32_t compile_options;
|
|
1850
|
+
int jit_compile_options;
|
|
1851
|
+
int start_offset;
|
|
1852
|
+
int skip_left;
|
|
1853
|
+
int skip_right;
|
|
1854
|
+
int match_start;
|
|
1855
|
+
int match_end;
|
|
1856
|
+
const char *pattern[2];
|
|
1857
|
+
const char *input;
|
|
1858
|
+
};
|
|
1859
|
+
|
|
1860
|
+
static const char invalid_utf8_newline_cr;
|
|
1861
|
+
|
|
1862
|
+
static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
|
|
1863
|
+
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
|
1864
|
+
{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
|
|
1865
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
|
|
1866
|
+
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
|
|
1867
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
|
|
1868
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
|
|
1869
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
|
|
1870
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
|
|
1871
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
|
|
1872
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
|
|
1873
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
|
|
1874
|
+
{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
|
|
1875
|
+
{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
|
|
1876
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
|
|
1877
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
|
|
1878
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
|
|
1879
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
|
|
1880
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
|
|
1881
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
|
|
1882
|
+
{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
|
|
1883
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
|
|
1884
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
|
|
1885
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
|
|
1886
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
|
|
1887
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
|
|
1888
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
|
|
1889
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
|
|
1890
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
|
|
1891
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
|
|
1892
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
|
|
1893
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
|
|
1894
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
|
|
1895
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
|
|
1896
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
|
|
1897
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
|
|
1898
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
|
|
1899
|
+
{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
|
|
1900
|
+
{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
|
|
1901
|
+
|
|
1902
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
|
|
1903
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
|
|
1904
|
+
{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
|
|
1905
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
|
|
1906
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
|
|
1907
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
|
|
1908
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
|
|
1909
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
|
|
1910
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
|
|
1911
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
|
|
1912
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
|
|
1913
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
|
|
1914
|
+
{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
|
|
1915
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
|
|
1916
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
|
|
1917
|
+
{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
|
|
1918
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
|
|
1919
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
|
|
1920
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
|
|
1921
|
+
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
|
|
1922
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
|
|
1923
|
+
{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
|
|
1924
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
|
|
1925
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
|
|
1926
|
+
{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
|
|
1927
|
+
|
|
1928
|
+
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
|
|
1929
|
+
{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
|
|
1930
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
|
|
1931
|
+
{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
|
|
1932
|
+
{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
|
|
1933
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
|
|
1934
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
|
|
1935
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
|
|
1936
|
+
|
|
1937
|
+
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
|
|
1938
|
+
{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
|
|
1939
|
+
{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
|
|
1940
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
|
|
1941
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
|
|
1942
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
|
|
1943
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
|
|
1944
|
+
|
|
1945
|
+
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
|
|
1946
|
+
{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
|
|
1947
|
+
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
|
|
1948
|
+
{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
|
|
1949
|
+
|
|
1950
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
|
|
1951
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
|
|
1952
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
|
1953
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
|
|
1954
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
|
|
1955
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
|
1956
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
|
|
1957
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
|
1958
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
|
|
1959
|
+
|
|
1960
|
+
{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
|
|
1961
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
|
|
1962
|
+
{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
|
|
1963
|
+
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
|
|
1964
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
|
|
1965
|
+
{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
|
1966
|
+
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
|
|
1967
|
+
{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
|
1968
|
+
{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
|
|
1969
|
+
|
|
1970
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
|
|
1971
|
+
{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
|
|
1972
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
|
|
1973
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
|
|
1974
|
+
|
|
1975
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
|
|
1976
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
|
|
1977
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
|
|
1978
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
|
|
1979
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
|
|
1980
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
|
|
1981
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
|
|
1982
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
|
|
1983
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
|
|
1984
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
|
|
1985
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
|
|
1986
|
+
|
|
1987
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
|
|
1988
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
|
|
1989
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
|
|
1990
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
|
|
1991
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
|
|
1992
|
+
{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
|
|
1993
|
+
|
|
1994
|
+
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
|
1995
|
+
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
|
1996
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
|
|
1997
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
|
|
1998
|
+
|
|
1999
|
+
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
|
|
2000
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
|
|
2001
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
|
|
2002
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
|
|
2003
|
+
|
|
2004
|
+
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
|
|
2005
|
+
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
|
|
2006
|
+
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
|
|
2007
|
+
|
|
2008
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
|
|
2009
|
+
|
|
2010
|
+
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
|
2011
|
+
};
|
|
2012
|
+
|
|
2013
|
+
#undef UDA
|
|
2014
|
+
#undef CI
|
|
2015
|
+
#undef CPI
|
|
2016
|
+
|
|
2017
|
+
static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
|
|
2018
|
+
int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
|
|
2019
|
+
{
|
|
2020
|
+
pcre2_code_8 *code;
|
|
2021
|
+
int result, errorcode;
|
|
2022
|
+
PCRE2_SIZE length, erroroffset;
|
|
2023
|
+
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
|
|
2024
|
+
|
|
2025
|
+
if (current->pattern[i] == NULL)
|
|
2026
|
+
return 1;
|
|
2027
|
+
|
|
2028
|
+
code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
|
|
2029
|
+
current->compile_options, &errorcode, &erroroffset, ccontext);
|
|
2030
|
+
|
|
2031
|
+
if (!code) {
|
|
2032
|
+
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
|
2033
|
+
return 0;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
|
|
2037
|
+
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
|
2038
|
+
pcre2_code_free_8(code);
|
|
2039
|
+
return 0;
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
|
|
2043
|
+
|
|
2044
|
+
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
|
2045
|
+
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
|
2046
|
+
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
|
2047
|
+
|
|
2048
|
+
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
|
2049
|
+
pcre2_code_free_8(code);
|
|
2050
|
+
return 0;
|
|
2051
|
+
}
|
|
2052
|
+
}
|
|
2053
|
+
|
|
2054
|
+
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
|
2055
|
+
result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
|
|
2056
|
+
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
|
2057
|
+
|
|
2058
|
+
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
|
2059
|
+
pcre2_code_free_8(code);
|
|
2060
|
+
return 0;
|
|
2061
|
+
}
|
|
2062
|
+
}
|
|
2063
|
+
|
|
2064
|
+
pcre2_code_free_8(code);
|
|
2065
|
+
return 1;
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
static int invalid_utf8_regression_tests(void)
|
|
2069
|
+
{
|
|
2070
|
+
const struct invalid_utf8_regression_test_case *current;
|
|
2071
|
+
pcre2_compile_context_8 *ccontext;
|
|
2072
|
+
pcre2_match_data_8 *mdata;
|
|
2073
|
+
int total = 0, successful = 0;
|
|
2074
|
+
int result;
|
|
2075
|
+
|
|
2076
|
+
printf("\nRunning invalid-utf8 JIT regression tests\n");
|
|
2077
|
+
|
|
2078
|
+
ccontext = pcre2_compile_context_create_8(NULL);
|
|
2079
|
+
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
|
2080
|
+
mdata = pcre2_match_data_create_8(4, NULL);
|
|
2081
|
+
|
|
2082
|
+
for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
|
|
2083
|
+
/* printf("\nPattern: %s :\n", current->pattern); */
|
|
2084
|
+
total++;
|
|
2085
|
+
|
|
2086
|
+
result = 1;
|
|
2087
|
+
if (current->pattern[1] != &invalid_utf8_newline_cr)
|
|
2088
|
+
{
|
|
2089
|
+
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
|
2090
|
+
result = 0;
|
|
2091
|
+
if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
|
|
2092
|
+
result = 0;
|
|
2093
|
+
} else {
|
|
2094
|
+
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
|
|
2095
|
+
if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
|
|
2096
|
+
result = 0;
|
|
2097
|
+
pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
|
|
2098
|
+
}
|
|
2099
|
+
|
|
2100
|
+
if (result) {
|
|
2101
|
+
successful++;
|
|
2102
|
+
}
|
|
2103
|
+
|
|
2104
|
+
printf(".");
|
|
2105
|
+
if ((total % 60) == 0)
|
|
2106
|
+
printf("\n");
|
|
2107
|
+
}
|
|
2108
|
+
|
|
2109
|
+
if ((total % 60) != 0)
|
|
2110
|
+
printf("\n");
|
|
2111
|
+
|
|
2112
|
+
pcre2_match_data_free_8(mdata);
|
|
2113
|
+
pcre2_compile_context_free_8(ccontext);
|
|
2114
|
+
|
|
2115
|
+
if (total == successful) {
|
|
2116
|
+
printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
|
|
2117
|
+
return 0;
|
|
2118
|
+
} else {
|
|
2119
|
+
printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
|
2120
|
+
return 1;
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
|
|
2124
|
+
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
|
|
2125
|
+
|
|
2126
|
+
static int invalid_utf8_regression_tests(void)
|
|
2127
|
+
{
|
|
2128
|
+
return 0;
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
|
|
2132
|
+
|
|
2133
|
+
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
|
|
2134
|
+
|
|
2135
|
+
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
|
2136
|
+
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
|
2137
|
+
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
|
2138
|
+
|
|
2139
|
+
struct invalid_utf16_regression_test_case {
|
|
2140
|
+
uint32_t compile_options;
|
|
2141
|
+
int jit_compile_options;
|
|
2142
|
+
int start_offset;
|
|
2143
|
+
int skip_left;
|
|
2144
|
+
int skip_right;
|
|
2145
|
+
int match_start;
|
|
2146
|
+
int match_end;
|
|
2147
|
+
const PCRE2_UCHAR16 *pattern[2];
|
|
2148
|
+
const PCRE2_UCHAR16 *input;
|
|
2149
|
+
};
|
|
2150
|
+
|
|
2151
|
+
static PCRE2_UCHAR16 allany16[] = { '.', 0 };
|
|
2152
|
+
static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
|
|
2153
|
+
static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
|
|
2154
|
+
static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
|
|
2155
|
+
static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
|
|
2156
|
+
static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
|
|
2157
|
+
static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
|
|
2158
|
+
static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
|
2159
|
+
static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
|
|
2160
|
+
static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
|
|
2161
|
+
static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
|
|
2162
|
+
static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
|
|
2163
|
+
static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
|
|
2164
|
+
static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
|
|
2165
|
+
static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
|
|
2166
|
+
static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
|
|
2167
|
+
static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
|
|
2168
|
+
static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
|
|
2169
|
+
static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
|
2170
|
+
static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
|
|
2171
|
+
|
|
2172
|
+
static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
|
|
2173
|
+
{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
|
|
2174
|
+
{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
|
|
2175
|
+
{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
|
|
2176
|
+
{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
|
|
2177
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
|
|
2178
|
+
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
|
|
2179
|
+
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
|
|
2180
|
+
{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
|
|
2181
|
+
{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
|
|
2182
|
+
{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
|
|
2183
|
+
|
|
2184
|
+
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
|
|
2185
|
+
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
|
|
2186
|
+
{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
|
|
2187
|
+
{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
|
|
2188
|
+
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
|
|
2189
|
+
{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
|
|
2190
|
+
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
|
|
2191
|
+
{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
|
|
2192
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
|
|
2193
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
|
|
2194
|
+
|
|
2195
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
|
|
2196
|
+
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
|
|
2197
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
|
|
2198
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
|
|
2199
|
+
|
|
2200
|
+
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
|
|
2201
|
+
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
|
|
2202
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
|
|
2203
|
+
{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
|
|
2204
|
+
{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
|
|
2205
|
+
{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
|
|
2206
|
+
|
|
2207
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
|
2208
|
+
{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
|
|
2209
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
|
|
2210
|
+
|
|
2211
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
|
|
2212
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
|
|
2213
|
+
|
|
2214
|
+
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
|
2215
|
+
{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
|
2216
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
|
|
2217
|
+
{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
|
|
2218
|
+
|
|
2219
|
+
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
|
2220
|
+
};
|
|
2221
|
+
|
|
2222
|
+
#undef UDA
|
|
2223
|
+
#undef CI
|
|
2224
|
+
#undef CPI
|
|
2225
|
+
|
|
2226
|
+
static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
|
|
2227
|
+
int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
|
|
2228
|
+
{
|
|
2229
|
+
pcre2_code_16 *code;
|
|
2230
|
+
int result, errorcode;
|
|
2231
|
+
PCRE2_SIZE length, erroroffset;
|
|
2232
|
+
const PCRE2_UCHAR16 *input;
|
|
2233
|
+
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
|
|
2234
|
+
|
|
2235
|
+
if (current->pattern[i] == NULL)
|
|
2236
|
+
return 1;
|
|
2237
|
+
|
|
2238
|
+
code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
|
2239
|
+
current->compile_options, &errorcode, &erroroffset, ccontext);
|
|
2240
|
+
|
|
2241
|
+
if (!code) {
|
|
2242
|
+
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
|
2243
|
+
return 0;
|
|
2244
|
+
}
|
|
2245
|
+
|
|
2246
|
+
if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
|
|
2247
|
+
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
|
2248
|
+
pcre2_code_free_16(code);
|
|
2249
|
+
return 0;
|
|
2250
|
+
}
|
|
2251
|
+
|
|
2252
|
+
input = current->input;
|
|
2253
|
+
length = 0;
|
|
2254
|
+
|
|
2255
|
+
while (*input++ != 0)
|
|
2256
|
+
length++;
|
|
2257
|
+
|
|
2258
|
+
length -= current->skip_left + current->skip_right;
|
|
2259
|
+
|
|
2260
|
+
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
|
2261
|
+
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
|
2262
|
+
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
|
2263
|
+
|
|
2264
|
+
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
|
2265
|
+
pcre2_code_free_16(code);
|
|
2266
|
+
return 0;
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
|
2271
|
+
result = pcre2_jit_match_16(code, (current->input + current->skip_left),
|
|
2272
|
+
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
|
2273
|
+
|
|
2274
|
+
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
|
2275
|
+
pcre2_code_free_16(code);
|
|
2276
|
+
return 0;
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
pcre2_code_free_16(code);
|
|
2281
|
+
return 1;
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
static int invalid_utf16_regression_tests(void)
|
|
2285
|
+
{
|
|
2286
|
+
const struct invalid_utf16_regression_test_case *current;
|
|
2287
|
+
pcre2_compile_context_16 *ccontext;
|
|
2288
|
+
pcre2_match_data_16 *mdata;
|
|
2289
|
+
int total = 0, successful = 0;
|
|
2290
|
+
int result;
|
|
2291
|
+
|
|
2292
|
+
printf("\nRunning invalid-utf16 JIT regression tests\n");
|
|
2293
|
+
|
|
2294
|
+
ccontext = pcre2_compile_context_create_16(NULL);
|
|
2295
|
+
pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
|
|
2296
|
+
mdata = pcre2_match_data_create_16(4, NULL);
|
|
2297
|
+
|
|
2298
|
+
for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
|
|
2299
|
+
/* printf("\nPattern: %s :\n", current->pattern); */
|
|
2300
|
+
total++;
|
|
2301
|
+
|
|
2302
|
+
result = 1;
|
|
2303
|
+
if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
|
|
2304
|
+
result = 0;
|
|
2305
|
+
if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
|
|
2306
|
+
result = 0;
|
|
2307
|
+
|
|
2308
|
+
if (result) {
|
|
2309
|
+
successful++;
|
|
2310
|
+
}
|
|
2311
|
+
|
|
2312
|
+
printf(".");
|
|
2313
|
+
if ((total % 60) == 0)
|
|
2314
|
+
printf("\n");
|
|
2315
|
+
}
|
|
2316
|
+
|
|
2317
|
+
if ((total % 60) != 0)
|
|
2318
|
+
printf("\n");
|
|
2319
|
+
|
|
2320
|
+
pcre2_match_data_free_16(mdata);
|
|
2321
|
+
pcre2_compile_context_free_16(ccontext);
|
|
2322
|
+
|
|
2323
|
+
if (total == successful) {
|
|
2324
|
+
printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
|
|
2325
|
+
return 0;
|
|
2326
|
+
} else {
|
|
2327
|
+
printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
|
2328
|
+
return 1;
|
|
2329
|
+
}
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2332
|
+
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
|
|
2333
|
+
|
|
2334
|
+
static int invalid_utf16_regression_tests(void)
|
|
2335
|
+
{
|
|
2336
|
+
return 0;
|
|
2337
|
+
}
|
|
2338
|
+
|
|
2339
|
+
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
|
|
2340
|
+
|
|
2341
|
+
#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
|
|
2342
|
+
|
|
2343
|
+
#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
|
|
2344
|
+
#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
|
|
2345
|
+
#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
|
|
2346
|
+
|
|
2347
|
+
struct invalid_utf32_regression_test_case {
|
|
2348
|
+
uint32_t compile_options;
|
|
2349
|
+
int jit_compile_options;
|
|
2350
|
+
int start_offset;
|
|
2351
|
+
int skip_left;
|
|
2352
|
+
int skip_right;
|
|
2353
|
+
int match_start;
|
|
2354
|
+
int match_end;
|
|
2355
|
+
const PCRE2_UCHAR32 *pattern[2];
|
|
2356
|
+
const PCRE2_UCHAR32 *input;
|
|
2357
|
+
};
|
|
2358
|
+
|
|
2359
|
+
static PCRE2_UCHAR32 allany32[] = { '.', 0 };
|
|
2360
|
+
static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
|
|
2361
|
+
static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
|
|
2362
|
+
static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
|
|
2363
|
+
static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
|
|
2364
|
+
static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
|
|
2365
|
+
static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
|
|
2366
|
+
static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
|
|
2367
|
+
static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
|
|
2368
|
+
static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
|
|
2369
|
+
static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
|
|
2370
|
+
static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
|
|
2371
|
+
static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
|
|
2372
|
+
|
|
2373
|
+
static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
|
|
2374
|
+
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
|
|
2375
|
+
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
|
|
2376
|
+
{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
|
|
2377
|
+
{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
|
|
2378
|
+
{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
|
2379
|
+
{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
|
|
2380
|
+
|
|
2381
|
+
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
|
|
2382
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
|
|
2383
|
+
{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
|
|
2384
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
|
2385
|
+
{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
|
|
2386
|
+
|
|
2387
|
+
{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
|
|
2388
|
+
{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
|
|
2389
|
+
|
|
2390
|
+
{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
|
|
2391
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
|
|
2392
|
+
{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
|
|
2393
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
|
2394
|
+
{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
|
|
2395
|
+
{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
|
|
2396
|
+
|
|
2397
|
+
{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
|
2398
|
+
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
|
|
2399
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
|
|
2400
|
+
{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
|
|
2401
|
+
{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
|
|
2402
|
+
|
|
2403
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
|
|
2404
|
+
{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
|
|
2405
|
+
|
|
2406
|
+
{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
|
|
2407
|
+
};
|
|
2408
|
+
|
|
2409
|
+
#undef UDA
|
|
2410
|
+
#undef CI
|
|
2411
|
+
#undef CPI
|
|
2412
|
+
|
|
2413
|
+
static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
|
|
2414
|
+
int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
|
|
2415
|
+
{
|
|
2416
|
+
pcre2_code_32 *code;
|
|
2417
|
+
int result, errorcode;
|
|
2418
|
+
PCRE2_SIZE length, erroroffset;
|
|
2419
|
+
const PCRE2_UCHAR32 *input;
|
|
2420
|
+
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
|
|
2421
|
+
|
|
2422
|
+
if (current->pattern[i] == NULL)
|
|
2423
|
+
return 1;
|
|
2424
|
+
|
|
2425
|
+
code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
|
|
2426
|
+
current->compile_options, &errorcode, &erroroffset, ccontext);
|
|
2427
|
+
|
|
2428
|
+
if (!code) {
|
|
2429
|
+
printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
|
|
2430
|
+
return 0;
|
|
2431
|
+
}
|
|
2432
|
+
|
|
2433
|
+
if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
|
|
2434
|
+
printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
|
|
2435
|
+
pcre2_code_free_32(code);
|
|
2436
|
+
return 0;
|
|
2437
|
+
}
|
|
2438
|
+
|
|
2439
|
+
input = current->input;
|
|
2440
|
+
length = 0;
|
|
2441
|
+
|
|
2442
|
+
while (*input++ != 0)
|
|
2443
|
+
length++;
|
|
2444
|
+
|
|
2445
|
+
length -= current->skip_left + current->skip_right;
|
|
2446
|
+
|
|
2447
|
+
if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
|
|
2448
|
+
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
|
2449
|
+
length, current->start_offset - current->skip_left, 0, mdata, NULL);
|
|
2450
|
+
|
|
2451
|
+
if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
|
|
2452
|
+
pcre2_code_free_32(code);
|
|
2453
|
+
return 0;
|
|
2454
|
+
}
|
|
2455
|
+
}
|
|
2456
|
+
|
|
2457
|
+
if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
|
|
2458
|
+
result = pcre2_jit_match_32(code, (current->input + current->skip_left),
|
|
2459
|
+
length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
|
|
2460
|
+
|
|
2461
|
+
if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
|
|
2462
|
+
pcre2_code_free_32(code);
|
|
2463
|
+
return 0;
|
|
2464
|
+
}
|
|
2465
|
+
}
|
|
2466
|
+
|
|
2467
|
+
pcre2_code_free_32(code);
|
|
2468
|
+
return 1;
|
|
2469
|
+
}
|
|
2470
|
+
|
|
2471
|
+
static int invalid_utf32_regression_tests(void)
|
|
2472
|
+
{
|
|
2473
|
+
const struct invalid_utf32_regression_test_case *current;
|
|
2474
|
+
pcre2_compile_context_32 *ccontext;
|
|
2475
|
+
pcre2_match_data_32 *mdata;
|
|
2476
|
+
int total = 0, successful = 0;
|
|
2477
|
+
int result;
|
|
2478
|
+
|
|
2479
|
+
printf("\nRunning invalid-utf32 JIT regression tests\n");
|
|
2480
|
+
|
|
2481
|
+
ccontext = pcre2_compile_context_create_32(NULL);
|
|
2482
|
+
pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
|
|
2483
|
+
mdata = pcre2_match_data_create_32(4, NULL);
|
|
2484
|
+
|
|
2485
|
+
for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
|
|
2486
|
+
/* printf("\nPattern: %s :\n", current->pattern); */
|
|
2487
|
+
total++;
|
|
2488
|
+
|
|
2489
|
+
result = 1;
|
|
2490
|
+
if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
|
|
2491
|
+
result = 0;
|
|
2492
|
+
if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
|
|
2493
|
+
result = 0;
|
|
2494
|
+
|
|
2495
|
+
if (result) {
|
|
2496
|
+
successful++;
|
|
2497
|
+
}
|
|
2498
|
+
|
|
2499
|
+
printf(".");
|
|
2500
|
+
if ((total % 60) == 0)
|
|
2501
|
+
printf("\n");
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
if ((total % 60) != 0)
|
|
2505
|
+
printf("\n");
|
|
2506
|
+
|
|
2507
|
+
pcre2_match_data_free_32(mdata);
|
|
2508
|
+
pcre2_compile_context_free_32(ccontext);
|
|
2509
|
+
|
|
2510
|
+
if (total == successful) {
|
|
2511
|
+
printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
|
|
2512
|
+
return 0;
|
|
2513
|
+
} else {
|
|
2514
|
+
printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
|
|
2515
|
+
return 1;
|
|
2516
|
+
}
|
|
2517
|
+
}
|
|
2518
|
+
|
|
2519
|
+
#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
|
|
2520
|
+
|
|
2521
|
+
static int invalid_utf32_regression_tests(void)
|
|
2522
|
+
{
|
|
2523
|
+
return 0;
|
|
2524
|
+
}
|
|
2525
|
+
|
|
2526
|
+
#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
|
|
2527
|
+
|
|
2528
|
+
/* End of pcre2_jit_test.c */
|