tpy-lang 0.3.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. tpy_lang-0.3.0.dev0.dist-info/METADATA +151 -0
  2. tpy_lang-0.3.0.dev0.dist-info/RECORD +333 -0
  3. tpy_lang-0.3.0.dev0.dist-info/WHEEL +4 -0
  4. tpy_lang-0.3.0.dev0.dist-info/entry_points.txt +3 -0
  5. tpyc/__init__.py +104 -0
  6. tpyc/__main__.py +6 -0
  7. tpyc/_buildinfo.py +1 -0
  8. tpyc/_data/docs/LANGUAGE_FEATURES.md +6278 -0
  9. tpyc/_data/docs/STDLIB_ROADMAP.md +1258 -0
  10. tpyc/_data/docs/TPY_FOR_AGENTS.md +556 -0
  11. tpyc/_data/lib/tpy/_bindings/__init__.py +6 -0
  12. tpyc/_data/lib/tpy/_bindings/pcre2.py +173 -0
  13. tpyc/_data/lib/tpy/_bindings/posix_socket.py +161 -0
  14. tpyc/_data/lib/tpy/_functools_macros.py +80 -0
  15. tpyc/_data/lib/tpy/_macro_helpers.py +161 -0
  16. tpyc/_data/lib/tpy/argparse.py +2062 -0
  17. tpyc/_data/lib/tpy/asyncio/__init__.py +744 -0
  18. tpyc/_data/lib/tpy/asyncio/_executor.py +515 -0
  19. tpyc/_data/lib/tpy/base64.py +410 -0
  20. tpyc/_data/lib/tpy/bisect.py +39 -0
  21. tpyc/_data/lib/tpy/builtins.py +38 -0
  22. tpyc/_data/lib/tpy/dataclasses.py +354 -0
  23. tpyc/_data/lib/tpy/enum.py +23 -0
  24. tpyc/_data/lib/tpy/functools.py +33 -0
  25. tpyc/_data/lib/tpy/hashlib.py +206 -0
  26. tpyc/_data/lib/tpy/heapq.py +118 -0
  27. tpyc/_data/lib/tpy/io.py +395 -0
  28. tpyc/_data/lib/tpy/json.py +221 -0
  29. tpyc/_data/lib/tpy/math.py +406 -0
  30. tpyc/_data/lib/tpy/random.py +597 -0
  31. tpyc/_data/lib/tpy/re.py +467 -0
  32. tpyc/_data/lib/tpy/socket.py +379 -0
  33. tpyc/_data/lib/tpy/struct.py +178 -0
  34. tpyc/_data/lib/tpy/sys.py +40 -0
  35. tpyc/_data/lib/tpy/time.py +39 -0
  36. tpyc/_data/lib/tpy/tpy/__init__.py +78 -0
  37. tpyc/_data/lib/tpy/tpy/_bootstrap/__init__.py +10 -0
  38. tpyc/_data/lib/tpy/tpy/_bootstrap/_decorators.py +37 -0
  39. tpyc/_data/lib/tpy/tpy/_bootstrap/_extern.py +64 -0
  40. tpyc/_data/lib/tpy/tpy/_builtins/__init__.py +11 -0
  41. tpyc/_data/lib/tpy/tpy/_builtins/_bytes.py +378 -0
  42. tpyc/_data/lib/tpy/tpy/_builtins/_dict.py +151 -0
  43. tpyc/_data/lib/tpy/tpy/_builtins/_exceptions.py +125 -0
  44. tpyc/_data/lib/tpy/tpy/_builtins/_funcs.py +681 -0
  45. tpyc/_data/lib/tpy/tpy/_builtins/_io.py +97 -0
  46. tpyc/_data/lib/tpy/tpy/_builtins/_list.py +127 -0
  47. tpyc/_data/lib/tpy/tpy/_builtins/_range.py +52 -0
  48. tpyc/_data/lib/tpy/tpy/_builtins/_set.py +139 -0
  49. tpyc/_data/lib/tpy/tpy/_builtins/_super.py +11 -0
  50. tpyc/_data/lib/tpy/tpy/_builtins/_types.py +661 -0
  51. tpyc/_data/lib/tpy/tpy/_core/__init__.py +23 -0
  52. tpyc/_data/lib/tpy/tpy/_core/_bytes_view.py +129 -0
  53. tpyc/_data/lib/tpy/tpy/_core/_containers.py +137 -0
  54. tpyc/_data/lib/tpy/tpy/_core/_functions.py +40 -0
  55. tpyc/_data/lib/tpy/tpy/_core/_types.py +2061 -0
  56. tpyc/_data/lib/tpy/tpy/_typing/__init__.py +77 -0
  57. tpyc/_data/lib/tpy/tpy/_version.py +29 -0
  58. tpyc/_data/lib/tpy/tpy/bits.py +28 -0
  59. tpyc/_data/lib/tpy/tpy/coro/__init__.py +127 -0
  60. tpyc/_data/lib/tpy/tpy/extern.py +8 -0
  61. tpyc/_data/lib/tpy/tpy/mem.py +49 -0
  62. tpyc/_data/lib/tpy/tpy/unsafe.py +195 -0
  63. tpyc/_data/lib/tpy/tpy/version.py +21 -0
  64. tpyc/_data/lib/tpy/typing.py +13 -0
  65. tpyc/_data/runtime/cpp/include/tpy/any.hpp +461 -0
  66. tpyc/_data/runtime/cpp/include/tpy/as_ostream.hpp +117 -0
  67. tpyc/_data/runtime/cpp/include/tpy/async.hpp +76 -0
  68. tpyc/_data/runtime/cpp/include/tpy/bigint.hpp +1343 -0
  69. tpyc/_data/runtime/cpp/include/tpy/builtins.hpp +400 -0
  70. tpyc/_data/runtime/cpp/include/tpy/bytes_ops.hpp +469 -0
  71. tpyc/_data/runtime/cpp/include/tpy/container_ops.hpp +487 -0
  72. tpyc/_data/runtime/cpp/include/tpy/copy_iter.hpp +82 -0
  73. tpyc/_data/runtime/cpp/include/tpy/core.hpp +558 -0
  74. tpyc/_data/runtime/cpp/include/tpy/dict_ops.hpp +289 -0
  75. tpyc/_data/runtime/cpp/include/tpy/dunder.hpp +750 -0
  76. tpyc/_data/runtime/cpp/include/tpy/dynamic.hpp +44 -0
  77. tpyc/_data/runtime/cpp/include/tpy/enum.hpp +40 -0
  78. tpyc/_data/runtime/cpp/include/tpy/file.hpp +245 -0
  79. tpyc/_data/runtime/cpp/include/tpy/fixed_int.hpp +317 -0
  80. tpyc/_data/runtime/cpp/include/tpy/format.hpp +954 -0
  81. tpyc/_data/runtime/cpp/include/tpy/frame_slot.hpp +120 -0
  82. tpyc/_data/runtime/cpp/include/tpy/generator.hpp +47 -0
  83. tpyc/_data/runtime/cpp/include/tpy/iterable_ops.hpp +122 -0
  84. tpyc/_data/runtime/cpp/include/tpy/itertools.hpp +749 -0
  85. tpyc/_data/runtime/cpp/include/tpy/next_iter.hpp +82 -0
  86. tpyc/_data/runtime/cpp/include/tpy/ordered_map.hpp +518 -0
  87. tpyc/_data/runtime/cpp/include/tpy/ordered_set.hpp +337 -0
  88. tpyc/_data/runtime/cpp/include/tpy/own_iter.hpp +54 -0
  89. tpyc/_data/runtime/cpp/include/tpy/pascal_graph_sdl.hpp +192 -0
  90. tpyc/_data/runtime/cpp/include/tpy/printing.hpp +302 -0
  91. tpyc/_data/runtime/cpp/include/tpy/protocols.hpp +61 -0
  92. tpyc/_data/runtime/cpp/include/tpy/range.hpp +115 -0
  93. tpyc/_data/runtime/cpp/include/tpy/ranges.hpp +212 -0
  94. tpyc/_data/runtime/cpp/include/tpy/set_ops.hpp +265 -0
  95. tpyc/_data/runtime/cpp/include/tpy/slice.hpp +47 -0
  96. tpyc/_data/runtime/cpp/include/tpy/span_iter.hpp +42 -0
  97. tpyc/_data/runtime/cpp/include/tpy/stdlib/math.hpp +41 -0
  98. tpyc/_data/runtime/cpp/include/tpy/stdlib/pcre2_h.hpp +96 -0
  99. tpyc/_data/runtime/cpp/include/tpy/stdlib/random.hpp +25 -0
  100. tpyc/_data/runtime/cpp/include/tpy/stdlib/socket_h.hpp +145 -0
  101. tpyc/_data/runtime/cpp/include/tpy/stdlib/time.hpp +62 -0
  102. tpyc/_data/runtime/cpp/include/tpy/system.hpp +121 -0
  103. tpyc/_data/runtime/cpp/include/tpy/throwable.hpp +55 -0
  104. tpyc/_data/runtime/cpp/include/tpy/tpy.hpp +156 -0
  105. tpyc/_data/runtime/cpp/include/tpy/type_name.hpp +77 -0
  106. tpyc/_data/runtime/cpp/include/tpy/type_traits.hpp +240 -0
  107. tpyc/_data/runtime/cpp/include/tpy/uninit_array_storage.hpp +250 -0
  108. tpyc/_data/runtime/cpp/include/tpy/uninit_heap_storage.hpp +277 -0
  109. tpyc/_data/runtime/cpp/include/tpy/varargs.hpp +174 -0
  110. tpyc/_data/runtime/cpp/include/tpy/variant_ref.hpp +118 -0
  111. tpyc/_data/runtime/cpp/src/stdlib/socket_impl.cpp +104 -0
  112. tpyc/_data/runtime/cpp/third_party/README.md +58 -0
  113. tpyc/_data/runtime/cpp/third_party/pcre2/AUTHORS +36 -0
  114. tpyc/_data/runtime/cpp/third_party/pcre2/CMakeLists.txt +1233 -0
  115. tpyc/_data/runtime/cpp/third_party/pcre2/COPYING +5 -0
  116. tpyc/_data/runtime/cpp/third_party/pcre2/ChangeLog +3097 -0
  117. tpyc/_data/runtime/cpp/third_party/pcre2/HACKING +853 -0
  118. tpyc/_data/runtime/cpp/third_party/pcre2/INSTALL +368 -0
  119. tpyc/_data/runtime/cpp/third_party/pcre2/LICENCE +94 -0
  120. tpyc/_data/runtime/cpp/third_party/pcre2/NEWS +492 -0
  121. tpyc/_data/runtime/cpp/third_party/pcre2/NON-AUTOTOOLS-BUILD +430 -0
  122. tpyc/_data/runtime/cpp/third_party/pcre2/README +956 -0
  123. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/COPYING-CMAKE-SCRIPTS +22 -0
  124. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindEditline.cmake +16 -0
  125. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindPackageHandleStandardArgs.cmake +58 -0
  126. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindReadline.cmake +29 -0
  127. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config-version.cmake.in +15 -0
  128. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config.cmake.in +148 -0
  129. tpyc/_data/runtime/cpp/third_party/pcre2/config-cmake.h.in +56 -0
  130. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-16.pc.in +13 -0
  131. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-32.pc.in +13 -0
  132. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-8.pc.in +13 -0
  133. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-posix.pc.in +13 -0
  134. tpyc/_data/runtime/cpp/third_party/pcre2/pcre2-config.in +121 -0
  135. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h +483 -0
  136. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.generic +483 -0
  137. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.in +460 -0
  138. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h +1010 -0
  139. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.generic +1010 -0
  140. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.in +1010 -0
  141. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_auto_possess.c +1371 -0
  142. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c +196 -0
  143. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c.dist +196 -0
  144. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chkdint.c +96 -0
  145. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_compile.c +11001 -0
  146. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_config.c +252 -0
  147. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_context.c +510 -0
  148. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_convert.c +1189 -0
  149. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dfa_match.c +4119 -0
  150. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dftables.c +297 -0
  151. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_error.c +345 -0
  152. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_extuni.c +162 -0
  153. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_find_bracket.c +219 -0
  154. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_fuzzsupport.c +792 -0
  155. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_internal.h +2084 -0
  156. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_intmodedep.h +940 -0
  157. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_compile.c +14972 -0
  158. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_match.c +200 -0
  159. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_misc.c +234 -0
  160. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_neon_inc.h +354 -0
  161. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_simd_inc.h +2355 -0
  162. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_test.c +2528 -0
  163. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_maketables.c +165 -0
  164. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match.c +7777 -0
  165. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match_data.c +185 -0
  166. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_newline.c +243 -0
  167. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ord2utf.c +120 -0
  168. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_pattern_info.c +432 -0
  169. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_printint.c +886 -0
  170. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_script_run.c +344 -0
  171. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_serialize.c +286 -0
  172. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_string_utils.c +237 -0
  173. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_study.c +1915 -0
  174. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substitute.c +1009 -0
  175. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substring.c +550 -0
  176. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_tables.c +234 -0
  177. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucd.c +5460 -0
  178. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucp.h +396 -0
  179. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucptables.c +1533 -0
  180. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_valid_utf.c +398 -0
  181. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_xclass.c +308 -0
  182. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2demo.c +497 -0
  183. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2grep.c +4606 -0
  184. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.c +425 -0
  185. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.h +187 -0
  186. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix_test.c +209 -0
  187. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2test.c +9708 -0
  188. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c +137 -0
  189. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c +327 -0
  190. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c +89 -0
  191. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c +62 -0
  192. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c +40 -0
  193. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c +72 -0
  194. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c +172 -0
  195. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c +141 -0
  196. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c +102 -0
  197. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfig.h +142 -0
  198. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigCPU.h +188 -0
  199. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigInternal.h +907 -0
  200. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.c +3561 -0
  201. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.h +2466 -0
  202. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_32.c +4636 -0
  203. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_64.c +3491 -0
  204. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_T2_32.c +4302 -0
  205. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeLOONGARCH_64.c +3765 -0
  206. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_32.c +472 -0
  207. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_64.c +387 -0
  208. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_common.c +4259 -0
  209. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_32.c +485 -0
  210. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_64.c +719 -0
  211. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_common.c +3161 -0
  212. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_32.c +142 -0
  213. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_64.c +222 -0
  214. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_common.c +3121 -0
  215. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeS390X.c +4526 -0
  216. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_32.c +1685 -0
  217. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_64.c +1398 -0
  218. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_common.c +5001 -0
  219. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitSerialize.c +516 -0
  220. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitUtils.c +344 -0
  221. tpyc/_data/runtime/cpp/third_party/pcre2.sources.txt +54 -0
  222. tpyc/_data/runtime/cpp/third_party/pcre2.vendor.json +7 -0
  223. tpyc/build/__init__.py +7 -0
  224. tpyc/build/pcre2.py +122 -0
  225. tpyc/build/third_party.py +413 -0
  226. tpyc/cli.py +822 -0
  227. tpyc/codegen_cpp/__init__.py +18 -0
  228. tpyc/codegen_cpp/builtins.py +484 -0
  229. tpyc/codegen_cpp/context.py +2064 -0
  230. tpyc/codegen_cpp/expressions.py +5940 -0
  231. tpyc/codegen_cpp/functions.py +1913 -0
  232. tpyc/codegen_cpp/gen_async.py +3258 -0
  233. tpyc/codegen_cpp/gen_generators.py +657 -0
  234. tpyc/codegen_cpp/generator.py +2258 -0
  235. tpyc/codegen_cpp/match.py +1997 -0
  236. tpyc/codegen_cpp/param_const.py +172 -0
  237. tpyc/codegen_cpp/protocols.py +907 -0
  238. tpyc/codegen_cpp/records.py +1654 -0
  239. tpyc/codegen_cpp/resumable_cfg.py +1651 -0
  240. tpyc/codegen_cpp/statements.py +4963 -0
  241. tpyc/codegen_cpp/string_dispatch.py +76 -0
  242. tpyc/codegen_cpp/test_context.py +46 -0
  243. tpyc/codegen_cpp/test_param_const.py +113 -0
  244. tpyc/codegen_cpp/test_resumable_cfg.py +182 -0
  245. tpyc/codegen_cpp/type_resolution.py +53 -0
  246. tpyc/codegen_cpp/types.py +436 -0
  247. tpyc/codegen_cpp/variant_access.py +135 -0
  248. tpyc/coercions.py +749 -0
  249. tpyc/compilation_context.py +57 -0
  250. tpyc/compiler.py +3945 -0
  251. tpyc/cycle_detection.py +358 -0
  252. tpyc/diagnostics.py +135 -0
  253. tpyc/dump_types.py +353 -0
  254. tpyc/frontend_diagnostics.py +47 -0
  255. tpyc/frontend_ir/__init__.py +140 -0
  256. tpyc/frontend_ir/lower.py +1098 -0
  257. tpyc/frontend_ir/nodes.py +718 -0
  258. tpyc/frontend_ir/resolver_adapter.py +151 -0
  259. tpyc/frontend_plugin.py +209 -0
  260. tpyc/install_docs.py +81 -0
  261. tpyc/liveness.py +756 -0
  262. tpyc/macro_api.py +1724 -0
  263. tpyc/macro_loader.py +497 -0
  264. tpyc/module_names.py +64 -0
  265. tpyc/modules/__init__.py +31 -0
  266. tpyc/modules/defs.py +89 -0
  267. tpyc/modules/registry.py +36 -0
  268. tpyc/modules/resolver.py +192 -0
  269. tpyc/modules/type_resolution.py +629 -0
  270. tpyc/namespace.py +172 -0
  271. tpyc/parse/__init__.py +84 -0
  272. tpyc/parse/imports.py +490 -0
  273. tpyc/parse/nodes.py +1732 -0
  274. tpyc/parse/parser.py +4043 -0
  275. tpyc/parse/resolve_refs.py +466 -0
  276. tpyc/parse/type_resolver.py +1060 -0
  277. tpyc/prescan.py +254 -0
  278. tpyc/qnames.py +149 -0
  279. tpyc/repl.py +529 -0
  280. tpyc/repl_backends.py +848 -0
  281. tpyc/sema/__init__.py +21 -0
  282. tpyc/sema/analyzer.py +3625 -0
  283. tpyc/sema/bound_check.py +72 -0
  284. tpyc/sema/builder_trace.py +684 -0
  285. tpyc/sema/calls.py +5406 -0
  286. tpyc/sema/compatibility.py +2107 -0
  287. tpyc/sema/context.py +1243 -0
  288. tpyc/sema/expressions.py +3737 -0
  289. tpyc/sema/flow_facts.py +199 -0
  290. tpyc/sema/init_tracker.py +150 -0
  291. tpyc/sema/list_literals.py +69 -0
  292. tpyc/sema/literal_utils.py +27 -0
  293. tpyc/sema/local_deduction.py +1088 -0
  294. tpyc/sema/macros.py +179 -0
  295. tpyc/sema/match.py +1177 -0
  296. tpyc/sema/method_expansion.py +347 -0
  297. tpyc/sema/methods.py +2197 -0
  298. tpyc/sema/mutation_propagation.py +268 -0
  299. tpyc/sema/narrowing.py +857 -0
  300. tpyc/sema/numeric_lattice.py +160 -0
  301. tpyc/sema/operators.py +402 -0
  302. tpyc/sema/overloads.py +841 -0
  303. tpyc/sema/protocols.py +1209 -0
  304. tpyc/sema/reach_analysis.py +202 -0
  305. tpyc/sema/registration.py +3156 -0
  306. tpyc/sema/scope_tracker.py +193 -0
  307. tpyc/sema/statements.py +4426 -0
  308. tpyc/sema/type_ops.py +1879 -0
  309. tpyc/sema/value_range.py +181 -0
  310. tpyc/symbol_binding.py +259 -0
  311. tpyc/test_c3_mro.py +208 -0
  312. tpyc/test_cli_argv.py +52 -0
  313. tpyc/test_compiler.py +559 -0
  314. tpyc/test_contains_type_param.py +101 -0
  315. tpyc/test_cycle_detection.py +221 -0
  316. tpyc/test_dump_types.py +225 -0
  317. tpyc/test_install_docs.py +65 -0
  318. tpyc/test_local_cpp_form.py +135 -0
  319. tpyc/test_macro_loader.py +76 -0
  320. tpyc/test_method_expansion.py +254 -0
  321. tpyc/test_nominal_identity.py +182 -0
  322. tpyc/test_overloads.py +410 -0
  323. tpyc/test_parse.py +303 -0
  324. tpyc/test_parse_type_ref.py +506 -0
  325. tpyc/test_parse_version_info.py +58 -0
  326. tpyc/test_reach_analysis.py +72 -0
  327. tpyc/test_ref_type.py +216 -0
  328. tpyc/test_send_sync_substitution.py +276 -0
  329. tpyc/test_tuple_mutation_propagation.py +206 -0
  330. tpyc/test_type_def_registry.py +1729 -0
  331. tpyc/test_union_types.py +195 -0
  332. tpyc/type_def_registry.py +975 -0
  333. tpyc/typesys.py +5104 -0
@@ -0,0 +1,2528 @@
1
+ /*************************************************
2
+ * Perl-Compatible Regular Expressions *
3
+ *************************************************/
4
+
5
+ /* PCRE is a library of functions to support regular expressions whose syntax
6
+ and semantics are as close as possible to those of the Perl 5 language.
7
+
8
+ Written by Philip Hazel
9
+ Original API code Copyright (c) 1997-2012 University of Cambridge
10
+ New API code Copyright (c) 2016 University of Cambridge
11
+
12
+ -----------------------------------------------------------------------------
13
+ Redistribution and use in source and binary forms, with or without
14
+ modification, are permitted provided that the following conditions are met:
15
+
16
+ * Redistributions of source code must retain the above copyright notice,
17
+ this list of conditions and the following disclaimer.
18
+
19
+ * Redistributions in binary form must reproduce the above copyright
20
+ notice, this list of conditions and the following disclaimer in the
21
+ documentation and/or other materials provided with the distribution.
22
+
23
+ * Neither the name of the University of Cambridge nor the names of its
24
+ contributors may be used to endorse or promote products derived from
25
+ this software without specific prior written permission.
26
+
27
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
+ POSSIBILITY OF SUCH DAMAGE.
38
+ -----------------------------------------------------------------------------
39
+ */
40
+
41
+ #ifdef HAVE_CONFIG_H
42
+ #include "config.h"
43
+ #endif
44
+
45
+ #include <stdio.h>
46
+ #include <string.h>
47
+
48
+ #define PCRE2_CODE_UNIT_WIDTH 0
49
+ #include "pcre2.h"
50
+
51
+ /*
52
+ Letter characters:
53
+ \xe6\x92\xad = 0x64ad = 25773 (kanji)
54
+ Non-letter characters:
55
+ \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
56
+ \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57
+ \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58
+ \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59
+ Newlines:
60
+ \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61
+ \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62
+ Othercase pairs:
63
+ \xc3\xa9 = 0xe9 = 233 (e')
64
+ \xc3\x89 = 0xc9 = 201 (E')
65
+ \xc3\xa1 = 0xe1 = 225 (a')
66
+ \xc3\x81 = 0xc1 = 193 (A')
67
+ \x53 = 0x53 = S
68
+ \x73 = 0x73 = s
69
+ \xc5\xbf = 0x17f = 383 (long S)
70
+ \xc8\xba = 0x23a = 570
71
+ \xe2\xb1\xa5 = 0x2c65 = 11365
72
+ \xe1\xbd\xb8 = 0x1f78 = 8056
73
+ \xe1\xbf\xb8 = 0x1ff8 = 8184
74
+ \xf0\x90\x90\x80 = 0x10400 = 66560
75
+ \xf0\x90\x90\xa8 = 0x10428 = 66600
76
+ \xc7\x84 = 0x1c4 = 452
77
+ \xc7\x85 = 0x1c5 = 453
78
+ \xc7\x86 = 0x1c6 = 454
79
+ Caseless sets:
80
+ ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81
+ ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82
+ ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83
+
84
+ Mark property:
85
+ \xcc\x8d = 0x30d = 781
86
+ Special:
87
+ \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88
+ \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89
+ \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90
+ \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91
+ \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92
+ \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93
+ */
94
+
95
+ static int regression_tests(void);
96
+ static int invalid_utf8_regression_tests(void);
97
+ static int invalid_utf16_regression_tests(void);
98
+ static int invalid_utf32_regression_tests(void);
99
+
100
+ int main(void)
101
+ {
102
+ int jit = 0;
103
+ #if defined SUPPORT_PCRE2_8
104
+ pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105
+ #elif defined SUPPORT_PCRE2_16
106
+ pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107
+ #elif defined SUPPORT_PCRE2_32
108
+ pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109
+ #endif
110
+ if (!jit) {
111
+ printf("JIT must be enabled to run pcre2_jit_test\n");
112
+ return 1;
113
+ }
114
+ return regression_tests()
115
+ | invalid_utf8_regression_tests()
116
+ | invalid_utf16_regression_tests()
117
+ | invalid_utf32_regression_tests();
118
+ }
119
+
120
+ /* --------------------------------------------------------------------------------------- */
121
+
122
+ #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123
+ #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124
+ #endif
125
+
126
+ #define MU (PCRE2_MULTILINE | PCRE2_UTF)
127
+ #define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128
+ #define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129
+ #define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130
+ #define M (PCRE2_MULTILINE)
131
+ #define MP (PCRE2_MULTILINE | PCRE2_UCP)
132
+ #define U (PCRE2_UTF)
133
+ #define CM (PCRE2_CASELESS | PCRE2_MULTILINE)
134
+
135
+ #define BSR(x) ((x) << 16)
136
+ #define A PCRE2_NEWLINE_ANYCRLF
137
+
138
+ #define GET_NEWLINE(x) ((x) & 0xffff)
139
+ #define GET_BSR(x) ((x) >> 16)
140
+
141
+ #define OFFSET_MASK 0x00ffff
142
+ #define F_NO8 0x010000
143
+ #define F_NO16 0x020000
144
+ #define F_NO32 0x020000
145
+ #define F_NOMATCH 0x040000
146
+ #define F_DIFF 0x080000
147
+ #define F_FORCECONV 0x100000
148
+ #define F_PROPERTY 0x200000
149
+
150
+ struct regression_test_case {
151
+ uint32_t compile_options;
152
+ int newline;
153
+ int match_options;
154
+ int start_offset;
155
+ const char *pattern;
156
+ const char *input;
157
+ };
158
+
159
+ static struct regression_test_case regression_test_cases[] = {
160
+ /* Constant strings. */
161
+ { MU, A, 0, 0, "AbC", "AbAbC" },
162
+ { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163
+ { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164
+ { M, A, 0, 0, "[^a]", "aAbB" },
165
+ { CM, A, 0, 0, "[^m]", "mMnN" },
166
+ { M, A, 0, 0, "a[^b][^#]", "abacd" },
167
+ { CM, A, 0, 0, "A[^B][^E]", "abacd" },
168
+ { CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169
+ { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170
+ { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171
+ { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172
+ { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173
+ { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174
+ { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175
+ { MU, A, 0, 0, "[axd]", "sAXd" },
176
+ { CMU, A, 0, 0, "[axd]", "sAXd" },
177
+ { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178
+ { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179
+ { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180
+ { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181
+ { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182
+ { MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183
+ { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184
+ { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185
+ { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186
+ { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187
+ { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188
+ #ifndef NEVER_BACKSLASH_C
189
+ { M, A, 0, 0, "\\Ca", "cda" },
190
+ { CM, A, 0, 0, "\\Ca", "CDA" },
191
+ { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192
+ { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193
+ #endif /* !NEVER_BACKSLASH_C */
194
+ { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195
+ { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196
+ { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197
+ { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198
+ { M, A, 0, 0, "[3-57-9]", "5" },
199
+ { PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200
+ "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201
+ { 0, A, 0, 0, "..a.......b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
202
+ { 0, A, 0, 0, "..a.....b", "bbbbbbbbbbbbbbbbbbbbbabbbbbbbb" },
203
+
204
+ /* Assertions. */
205
+ { MU, A, 0, 0, "\\b[^A]", "A_B#" },
206
+ { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
207
+ { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
208
+ { MP, A, 0, 0, "\\B", "_\xa1" },
209
+ { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
210
+ { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
211
+ { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
212
+ { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
213
+ { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
214
+ { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
215
+ { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
216
+ { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
217
+ { M, A, 0, 1 | F_NOMATCH, "^", "\n" },
218
+ { 0, 0, 0, 0, "^ab", "ab" },
219
+ { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
220
+ { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
221
+ { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
222
+ { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
223
+ { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
224
+ { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
225
+ { 0, 0, 0, 0, "ab$", "ab" },
226
+ { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
227
+ { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
228
+ { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
229
+ { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
230
+ { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
231
+ { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
232
+ { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
233
+ { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
234
+ { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
235
+ { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
236
+ { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
237
+ { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
238
+ { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
239
+ { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
240
+ { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
241
+ { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
242
+ { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
243
+ { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
244
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
245
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
246
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
247
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
248
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
249
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
250
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
251
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
252
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
253
+ { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
254
+ { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
255
+ { M, A, 0, 0, "\\Aa", "aaa" },
256
+ { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
257
+ { M, A, 0, 1, "\\Ga", "aaa" },
258
+ { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
259
+ { M, A, 0, 0, "a\\z", "aaa" },
260
+ { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
261
+
262
+ /* Brackets and alternatives. */
263
+ { MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
264
+ { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
265
+ { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
266
+ { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
267
+ { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
268
+ { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
269
+ { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270
+ { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
271
+ { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
272
+ { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
273
+ { U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
274
+ { U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
275
+ { CM, A, 0, 0, "ab|cd", "CD" },
276
+ { CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
277
+ { CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
278
+ { 0, A, 0, 0, "(a|)b*+a", "a" },
279
+ { 0, A, 0, 0 | F_NOMATCH, "(.|.|.|.|.)(|.|.|.|.)(.||.|.|.)(.|.||.|.)(.|.|.||.)(.|.|.|.|)(A|.|.|.|.)(.|A|.|.|.)(.|.|A|.|.)(.|.|.|A|.)(.|.|.|.|A)(B|.|.|.|.)(.|B|.|.|.)(.|.|B|.|.)(.|.|.|B|.)(.|.|.|.|B)xa", "1234567890123456ax" },
280
+
281
+ /* Greedy and non-greedy ? operators. */
282
+ { MU, A, 0, 0, "(?:a)?a", "laab" },
283
+ { CMU, A, 0, 0, "(A)?A", "llaab" },
284
+ { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
285
+ { MU, A, 0, 0, "(a)?a", "manm" },
286
+ { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
287
+ { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
288
+ { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
289
+
290
+ /* Greedy and non-greedy + operators */
291
+ { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
292
+ { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
293
+ { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
294
+ { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
295
+ { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
296
+ { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
297
+ { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
298
+ { MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
299
+
300
+ /* Greedy and non-greedy * operators */
301
+ { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
302
+ { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
303
+ { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
304
+ { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
305
+ { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
306
+ { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
307
+ { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
308
+ { M, A, 0, 0, "((?:a|)*){0}a", "a" },
309
+
310
+ /* Combining ? + * operators */
311
+ { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
312
+ { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
313
+ { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
314
+ { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
315
+ { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
316
+
317
+ /* Single character iterators. */
318
+ { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
319
+ { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
320
+ { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
321
+ { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
322
+ { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
323
+ { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
324
+ { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
325
+ { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
326
+ { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
327
+ { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
328
+ { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
329
+ { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
330
+ { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
331
+ { CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
332
+ { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
333
+ { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
334
+ { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
335
+ { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
336
+ { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
337
+ { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
338
+ { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
339
+ { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
340
+ { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
341
+ { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
342
+ { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
343
+ { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
344
+ { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
345
+ { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
346
+ { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
347
+ { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
348
+ { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
349
+ { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
350
+ { MU, A, 0, 0, "\\d+123", "987654321,01234" },
351
+ { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
352
+ { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
353
+ { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
354
+ { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
355
+ { MU, A, 0, 0, ".[ab]*.", "xx" },
356
+ { MU, A, 0, 0, ".[ab]*a", "xxa" },
357
+ { MU, A, 0, 0, ".[ab]?.", "xx" },
358
+ { MU, A, 0, 0, "_[ab]+_*a", "_aa" },
359
+ { MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
360
+ { MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
361
+ { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
362
+ { 0, A, 0, 0, "<(\\w+)[\\s\\w]+id>", "<br><div id>" },
363
+
364
+ /* Bracket repeats with limit. */
365
+ { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
366
+ { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
367
+ { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
368
+ { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
369
+ { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
370
+ { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
371
+ { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
372
+ { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
373
+ { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
374
+
375
+ /* Basic character sets. */
376
+ { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
377
+ { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
378
+ { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
379
+ { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
380
+ { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
381
+ { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
382
+ { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
383
+ { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
384
+ { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
385
+ { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
386
+ { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
387
+ { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
388
+ { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
389
+ { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
390
+ { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
391
+ { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
392
+ { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
393
+ { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
394
+ { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
395
+ { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
396
+ { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
397
+ { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
398
+ { CMU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "^[\\x{100}-\\x{17f}]", " " },
399
+ { M, A, 0, 0 | F_NOMATCH, "[^\\S\\W]{6}", "abcdefghijk" },
400
+
401
+ /* Unicode properties. */
402
+ { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
403
+ { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
404
+ { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
405
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
406
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
407
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
408
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
409
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
410
+ { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
411
+ { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
412
+ { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
413
+ { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
414
+ { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
415
+ { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
416
+ { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
417
+ { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
418
+ { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
419
+ { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
420
+ { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
421
+ { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" },
422
+ { MUP, 0, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Hangul}\\p{Z}]", " " },
423
+ { MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
424
+ { MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
425
+ { CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
426
+ { MUP, 0, 0, 0 | F_NOMATCH, "[^[:print:]\\x{f6f6}]", "\xef\x9b\xb6" },
427
+ { MUP, 0, 0, 0, "[[:xdigit:]\\x{6500}]#", "\xe6\x94\x80#" },
428
+ { MUP, 0, 0, 0 | F_PROPERTY, "[\\pC\\PC]#", "A#" },
429
+
430
+ /* Possible empty brackets. */
431
+ { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
432
+ { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
433
+ { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
434
+ { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
435
+ { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
436
+ { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
437
+ { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
438
+ { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
439
+ { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
440
+ { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
441
+
442
+ /* Start offset. */
443
+ { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
444
+ { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
445
+ { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
446
+ { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
447
+
448
+ /* Newline. */
449
+ { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
450
+ { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
451
+ { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
452
+ { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
453
+ { MU, A, 0, 1, "^", "\r\n" },
454
+ { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
455
+ { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
456
+
457
+ /* Any character except newline or any newline. */
458
+ { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
459
+ { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
460
+ { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
461
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
462
+ { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
463
+ { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
464
+ { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
465
+ { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
466
+ { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
467
+ { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
468
+ { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
469
+ { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
470
+ { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
471
+ { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
472
+ { MU, A, 0, 0, "\\R+", "ab\r\n\r" },
473
+ { MU, A, 0, 0, "\\R*", "ab\r\n\r" },
474
+ { MU, A, 0, 0, "\\R*", "\r\n\r" },
475
+ { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
476
+ { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
477
+ { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
478
+ { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
479
+ { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
480
+ { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
481
+ { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
482
+ { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
483
+ { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
484
+
485
+ /* Atomic groups (no fallback from "next" direction). */
486
+ { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
487
+ { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
488
+ { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
489
+ "bababcdedefgheijijklmlmnop" },
490
+ { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
491
+ { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
492
+ { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
493
+ { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
494
+ { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
495
+ { MU, A, 0, 0, "(?>x|)*$", "aaa" },
496
+ { MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
497
+ { MU, A, 0, 0, "(?>x|())*$", "aaa" },
498
+ { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
499
+ { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
500
+ { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
501
+ { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
502
+ { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
503
+ { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
504
+ { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
505
+ { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
506
+ { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
507
+ { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
508
+ { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
509
+ { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
510
+ { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
511
+ { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
512
+ { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
513
+ { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
514
+ { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
515
+ { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
516
+ { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
517
+ { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
518
+ { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
519
+ { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
520
+ { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
521
+ { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
522
+ { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
523
+ { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
524
+ { MU, A, 0, 0 | F_NOMATCH, "(?>a*|)a", "aaa" },
525
+
526
+ /* Possessive quantifiers. */
527
+ { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
528
+ { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
529
+ { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
530
+ { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
531
+ { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
532
+ { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
533
+ { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
534
+ { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
535
+ { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
536
+ { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
537
+ { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
538
+ { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
539
+ { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
540
+ { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
541
+ { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
542
+ { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
543
+ { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
544
+ { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
545
+ { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
546
+ { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
547
+ { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
548
+ { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
549
+ { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
550
+ { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
551
+ { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
552
+ { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
553
+ { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
554
+ { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
555
+ { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
556
+ { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
557
+ { MU, A, 0, 0, "(A)*+$", "ABC" },
558
+ { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
559
+ { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
560
+ { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
561
+ { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
562
+ { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
563
+
564
+ /* Back references. */
565
+ { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
566
+ { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
567
+ { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
568
+ { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
569
+ { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
570
+ { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
571
+ { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
572
+ { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
573
+ { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
574
+ { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
575
+ { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
576
+ { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
577
+ { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
578
+ { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
579
+ { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
580
+ { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
581
+ { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
582
+ { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
583
+ { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
584
+ { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
585
+ { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
586
+ { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
587
+ { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
588
+ { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
589
+ { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
590
+ { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
591
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
592
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
593
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
594
+ { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
595
+ { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
596
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
597
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
598
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
599
+ { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
600
+ { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
601
+ { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
602
+ { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
603
+ { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
604
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
605
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
606
+
607
+ /* Assertions. */
608
+ { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
609
+ { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
610
+ { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
611
+ { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
612
+ { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
613
+ { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
614
+ { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
615
+ { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
616
+ { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
617
+ { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
618
+ { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
619
+ { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
620
+ { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
621
+ { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
622
+ { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
623
+ { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
624
+ { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
625
+ { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
626
+ { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
627
+ { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
628
+ { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
629
+ { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
630
+ { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
631
+ { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
632
+ { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
633
+ { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
634
+ { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
635
+ { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
636
+ { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
637
+ { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
638
+ { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
639
+ { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
640
+ { MU, A, 0, 0, "a(?=)b", "ab" },
641
+ { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
642
+ { MU, A, 0, 0, "(?(?<!|(|a)))", "a" },
643
+
644
+ /* Not empty, ACCEPT, FAIL */
645
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
646
+ { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
647
+ { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
648
+ { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
649
+ { MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
650
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
651
+ { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
652
+ { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
653
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
654
+ { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
655
+ { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
656
+ { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
657
+ { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
658
+ { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
659
+ { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
660
+ { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
661
+ { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
662
+ { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
663
+ { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
664
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
665
+ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
666
+
667
+ /* Conditional blocks. */
668
+ { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
669
+ { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
670
+ { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
671
+ { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
672
+ { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
673
+ { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
674
+ { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
675
+ { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
676
+ { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
677
+ { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
678
+ { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
679
+ { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
680
+ { MU, A, 0, 0, "(?(?=a)ab)", "a" },
681
+ { MU, A, 0, 0, "(?(?<!b)c)", "b" },
682
+ { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
683
+ { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
684
+ { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
685
+ { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
686
+ { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
687
+ { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
688
+ { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
689
+ { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
690
+ { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
691
+ { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
692
+ { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
693
+ { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
694
+ { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
695
+ { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
696
+ { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
697
+ { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
698
+ { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
699
+ { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
700
+ { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
701
+ { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
702
+ { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
703
+ { MU, A, 0, 0, "(?(?!)a|b)", "ab" },
704
+ { MU, A, 0, 0, "(?(?!)a)", "ab" },
705
+ { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
706
+
707
+ /* Set start of match. */
708
+ { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
709
+ { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
710
+ { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
711
+ { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
712
+ { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
713
+
714
+ /* First line. */
715
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
716
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
717
+ { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
718
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
719
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
720
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
721
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
722
+ { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
723
+ { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
724
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
725
+ { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
726
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
727
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
728
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
729
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
730
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
731
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
732
+ { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
733
+ { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
734
+ { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
735
+ { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
736
+ { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
737
+
738
+ /* Recurse. */
739
+ { MU, A, 0, 0, "(a)(?1)", "aa" },
740
+ { MU, A, 0, 0, "((a))(?1)", "aa" },
741
+ { MU, A, 0, 0, "(b|a)(?1)", "aa" },
742
+ { MU, A, 0, 0, "(b|(a))(?1)", "aa" },
743
+ { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
744
+ { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
745
+ { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
746
+ { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
747
+ { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
748
+ { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
749
+ { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
750
+ { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
751
+ { MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
752
+ { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
753
+ { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
754
+ { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
755
+ { MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
756
+ { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
757
+ { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
758
+ { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
759
+ { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
760
+ { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
761
+ { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
762
+ { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
763
+ { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
764
+ { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
765
+ { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
766
+ { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
767
+ { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
768
+ { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
769
+ { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
770
+ { MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
771
+
772
+ /* 16 bit specific tests. */
773
+ { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
774
+ { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
775
+ { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
776
+ { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
777
+ { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
778
+ { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
779
+ { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
780
+ { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
781
+ { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
782
+ { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
783
+ { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
784
+ { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
785
+ { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
786
+ { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
787
+ { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
788
+ { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
789
+ { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
790
+ { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
791
+ { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
792
+ { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
793
+ { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
794
+ { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
795
+ { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
796
+ { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
797
+ { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
798
+ { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
799
+ { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
800
+ { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
801
+ { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
802
+ { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
803
+ { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
804
+
805
+ /* Partial matching. */
806
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
807
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
808
+ { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
809
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
810
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
811
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
812
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
813
+ { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
814
+
815
+ /* (*MARK) verb. */
816
+ { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
817
+ { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
818
+ { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
819
+ { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
820
+ { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
821
+ { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
822
+ { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
823
+ { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
824
+ { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
825
+ { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
826
+ { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
827
+ { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
828
+ { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
829
+ { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
830
+ { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
831
+
832
+ /* (*COMMIT) verb. */
833
+ { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
834
+ { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
835
+ { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
836
+ { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
837
+ { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
838
+ { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
839
+
840
+ /* (*PRUNE) verb. */
841
+ { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
842
+ { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
843
+ { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
844
+ { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
845
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
846
+ { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
847
+ { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
848
+ { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
849
+ { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
850
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
851
+ { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
852
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
853
+ { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
854
+ { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
855
+ { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
856
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
857
+ { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
858
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
859
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
860
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
861
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
862
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
863
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
864
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
865
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
866
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
867
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
868
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
869
+ { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
870
+ { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
871
+
872
+ /* (*SKIP) verb. */
873
+ { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
874
+ { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
875
+ { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
876
+ { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
877
+
878
+ /* (*THEN) verb. */
879
+ { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
880
+ { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
881
+ { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
882
+ { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
883
+ { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
884
+ { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
885
+ { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
886
+ { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
887
+ { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
888
+ { MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
889
+ { MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
890
+ { MU, A, 0, 0 | F_NOMATCH, "(?<!(*THEN)a|(*THEN)b|(*THEN)ab?|(*THEN)ba?|)", "c" },
891
+
892
+ /* Recurse and control verbs. */
893
+ { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
894
+ { MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
895
+ { MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
896
+ { MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
897
+ { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
898
+ { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
899
+ { MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
900
+ { MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
901
+
902
+ #ifdef SUPPORT_UNICODE
903
+ /* Script runs and iterations. */
904
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
905
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
906
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
907
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
908
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
909
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
910
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
911
+ { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
912
+ #endif /* SUPPORT_UNICODE */
913
+
914
+ /* Deep recursion. */
915
+ { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
916
+ { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
917
+ { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
918
+
919
+ /* Deep recursion: Stack limit reached. */
920
+ { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
921
+ { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
922
+ { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
923
+ { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
924
+ { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
925
+
926
+ { 0, 0, 0, 0, NULL, NULL }
927
+ };
928
+
929
+ #ifdef SUPPORT_PCRE2_8
930
+ static pcre2_jit_stack_8* callback8(void *arg)
931
+ {
932
+ return (pcre2_jit_stack_8 *)arg;
933
+ }
934
+ #endif
935
+
936
+ #ifdef SUPPORT_PCRE2_16
937
+ static pcre2_jit_stack_16* callback16(void *arg)
938
+ {
939
+ return (pcre2_jit_stack_16 *)arg;
940
+ }
941
+ #endif
942
+
943
+ #ifdef SUPPORT_PCRE2_32
944
+ static pcre2_jit_stack_32* callback32(void *arg)
945
+ {
946
+ return (pcre2_jit_stack_32 *)arg;
947
+ }
948
+ #endif
949
+
950
+ #ifdef SUPPORT_PCRE2_8
951
+ static pcre2_jit_stack_8 *stack8;
952
+
953
+ static pcre2_jit_stack_8 *getstack8(void)
954
+ {
955
+ if (!stack8)
956
+ stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
957
+ return stack8;
958
+ }
959
+
960
+ static void setstack8(pcre2_match_context_8 *mcontext)
961
+ {
962
+ if (!mcontext) {
963
+ if (stack8)
964
+ pcre2_jit_stack_free_8(stack8);
965
+ stack8 = NULL;
966
+ return;
967
+ }
968
+
969
+ pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
970
+ }
971
+ #endif /* SUPPORT_PCRE2_8 */
972
+
973
+ #ifdef SUPPORT_PCRE2_16
974
+ static pcre2_jit_stack_16 *stack16;
975
+
976
+ static pcre2_jit_stack_16 *getstack16(void)
977
+ {
978
+ if (!stack16)
979
+ stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
980
+ return stack16;
981
+ }
982
+
983
+ static void setstack16(pcre2_match_context_16 *mcontext)
984
+ {
985
+ if (!mcontext) {
986
+ if (stack16)
987
+ pcre2_jit_stack_free_16(stack16);
988
+ stack16 = NULL;
989
+ return;
990
+ }
991
+
992
+ pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
993
+ }
994
+ #endif /* SUPPORT_PCRE2_16 */
995
+
996
+ #ifdef SUPPORT_PCRE2_32
997
+ static pcre2_jit_stack_32 *stack32;
998
+
999
+ static pcre2_jit_stack_32 *getstack32(void)
1000
+ {
1001
+ if (!stack32)
1002
+ stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
1003
+ return stack32;
1004
+ }
1005
+
1006
+ static void setstack32(pcre2_match_context_32 *mcontext)
1007
+ {
1008
+ if (!mcontext) {
1009
+ if (stack32)
1010
+ pcre2_jit_stack_free_32(stack32);
1011
+ stack32 = NULL;
1012
+ return;
1013
+ }
1014
+
1015
+ pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
1016
+ }
1017
+ #endif /* SUPPORT_PCRE2_32 */
1018
+
1019
+ #ifdef SUPPORT_PCRE2_16
1020
+
1021
+ static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1022
+ {
1023
+ PCRE2_SPTR8 iptr = input;
1024
+ PCRE2_UCHAR16 *optr = output;
1025
+ unsigned int c;
1026
+
1027
+ if (max_length == 0)
1028
+ return 0;
1029
+
1030
+ while (*iptr && max_length > 1) {
1031
+ c = 0;
1032
+ if (offsetmap)
1033
+ *offsetmap++ = (int)(iptr - (unsigned char*)input);
1034
+
1035
+ if (*iptr < 0xc0)
1036
+ c = *iptr++;
1037
+ else if (!(*iptr & 0x20)) {
1038
+ c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1039
+ iptr += 2;
1040
+ } else if (!(*iptr & 0x10)) {
1041
+ c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1042
+ iptr += 3;
1043
+ } else if (!(*iptr & 0x08)) {
1044
+ c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1045
+ iptr += 4;
1046
+ }
1047
+
1048
+ if (c < 65536) {
1049
+ *optr++ = c;
1050
+ max_length--;
1051
+ } else if (max_length <= 2) {
1052
+ *optr = '\0';
1053
+ return (int)(optr - output);
1054
+ } else {
1055
+ c -= 0x10000;
1056
+ *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1057
+ *optr++ = 0xdc00 | (c & 0x3ff);
1058
+ max_length -= 2;
1059
+ if (offsetmap)
1060
+ offsetmap++;
1061
+ }
1062
+ }
1063
+ if (offsetmap)
1064
+ *offsetmap = (int)(iptr - (unsigned char*)input);
1065
+ *optr = '\0';
1066
+ return (int)(optr - output);
1067
+ }
1068
+
1069
+ static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1070
+ {
1071
+ PCRE2_SPTR8 iptr = input;
1072
+ PCRE2_UCHAR16 *optr = output;
1073
+
1074
+ if (max_length == 0)
1075
+ return 0;
1076
+
1077
+ while (*iptr && max_length > 1) {
1078
+ *optr++ = *iptr++;
1079
+ max_length--;
1080
+ }
1081
+ *optr = '\0';
1082
+ return (int)(optr - output);
1083
+ }
1084
+
1085
+ #define REGTEST_MAX_LENGTH16 4096
1086
+ static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1087
+ static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1088
+
1089
+ #endif /* SUPPORT_PCRE2_16 */
1090
+
1091
+ #ifdef SUPPORT_PCRE2_32
1092
+
1093
+ static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1094
+ {
1095
+ PCRE2_SPTR8 iptr = input;
1096
+ PCRE2_UCHAR32 *optr = output;
1097
+ unsigned int c;
1098
+
1099
+ if (max_length == 0)
1100
+ return 0;
1101
+
1102
+ while (*iptr && max_length > 1) {
1103
+ c = 0;
1104
+ if (offsetmap)
1105
+ *offsetmap++ = (int)(iptr - (unsigned char*)input);
1106
+
1107
+ if (*iptr < 0xc0)
1108
+ c = *iptr++;
1109
+ else if (!(*iptr & 0x20)) {
1110
+ c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1111
+ iptr += 2;
1112
+ } else if (!(*iptr & 0x10)) {
1113
+ c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1114
+ iptr += 3;
1115
+ } else if (!(*iptr & 0x08)) {
1116
+ c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1117
+ iptr += 4;
1118
+ }
1119
+
1120
+ *optr++ = c;
1121
+ max_length--;
1122
+ }
1123
+ if (offsetmap)
1124
+ *offsetmap = (int)(iptr - (unsigned char*)input);
1125
+ *optr = 0;
1126
+ return (int)(optr - output);
1127
+ }
1128
+
1129
+ static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1130
+ {
1131
+ PCRE2_SPTR8 iptr = input;
1132
+ PCRE2_UCHAR32 *optr = output;
1133
+
1134
+ if (max_length == 0)
1135
+ return 0;
1136
+
1137
+ while (*iptr && max_length > 1) {
1138
+ *optr++ = *iptr++;
1139
+ max_length--;
1140
+ }
1141
+ *optr = '\0';
1142
+ return (int)(optr - output);
1143
+ }
1144
+
1145
+ #define REGTEST_MAX_LENGTH32 4096
1146
+ static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1147
+ static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1148
+
1149
+ #endif /* SUPPORT_PCRE2_32 */
1150
+
1151
+ static int check_ascii(const char *input)
1152
+ {
1153
+ const unsigned char *ptr = (unsigned char *)input;
1154
+ while (*ptr) {
1155
+ if (*ptr > 127)
1156
+ return 0;
1157
+ ptr++;
1158
+ }
1159
+ return 1;
1160
+ }
1161
+
1162
+ #define OVECTOR_SIZE 15
1163
+
1164
+ static int regression_tests(void)
1165
+ {
1166
+ struct regression_test_case *current = regression_test_cases;
1167
+ int error;
1168
+ PCRE2_SIZE err_offs;
1169
+ int is_successful;
1170
+ int is_ascii;
1171
+ int total = 0;
1172
+ int successful = 0;
1173
+ int successful_row = 0;
1174
+ int counter = 0;
1175
+ int jit_compile_mode;
1176
+ int utf = 0;
1177
+ uint32_t disabled_options = 0;
1178
+ int i;
1179
+ #ifdef SUPPORT_PCRE2_8
1180
+ pcre2_code_8 *re8;
1181
+ pcre2_compile_context_8 *ccontext8;
1182
+ pcre2_match_data_8 *mdata8_1;
1183
+ pcre2_match_data_8 *mdata8_2;
1184
+ pcre2_match_context_8 *mcontext8;
1185
+ PCRE2_SIZE *ovector8_1 = NULL;
1186
+ PCRE2_SIZE *ovector8_2 = NULL;
1187
+ int return_value8[2];
1188
+ #endif
1189
+ #ifdef SUPPORT_PCRE2_16
1190
+ pcre2_code_16 *re16;
1191
+ pcre2_compile_context_16 *ccontext16;
1192
+ pcre2_match_data_16 *mdata16_1;
1193
+ pcre2_match_data_16 *mdata16_2;
1194
+ pcre2_match_context_16 *mcontext16;
1195
+ PCRE2_SIZE *ovector16_1 = NULL;
1196
+ PCRE2_SIZE *ovector16_2 = NULL;
1197
+ int return_value16[2];
1198
+ int length16;
1199
+ #endif
1200
+ #ifdef SUPPORT_PCRE2_32
1201
+ pcre2_code_32 *re32;
1202
+ pcre2_compile_context_32 *ccontext32;
1203
+ pcre2_match_data_32 *mdata32_1;
1204
+ pcre2_match_data_32 *mdata32_2;
1205
+ pcre2_match_context_32 *mcontext32;
1206
+ PCRE2_SIZE *ovector32_1 = NULL;
1207
+ PCRE2_SIZE *ovector32_2 = NULL;
1208
+ int return_value32[2];
1209
+ int length32;
1210
+ #endif
1211
+
1212
+ #if defined SUPPORT_PCRE2_8
1213
+ PCRE2_UCHAR8 cpu_info[128];
1214
+ #elif defined SUPPORT_PCRE2_16
1215
+ PCRE2_UCHAR16 cpu_info[128];
1216
+ #elif defined SUPPORT_PCRE2_32
1217
+ PCRE2_UCHAR32 cpu_info[128];
1218
+ #endif
1219
+ #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1220
+ int return_value;
1221
+ #endif
1222
+
1223
+ /* This test compares the behaviour of interpreter and JIT. Although disabling
1224
+ utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1225
+ still considered successful from pcre2_jit_test point of view. */
1226
+
1227
+ #if defined SUPPORT_PCRE2_8
1228
+ pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1229
+ #elif defined SUPPORT_PCRE2_16
1230
+ pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1231
+ #elif defined SUPPORT_PCRE2_32
1232
+ pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1233
+ #endif
1234
+
1235
+ printf("Running JIT regression tests\n");
1236
+ printf(" target CPU of SLJIT compiler: ");
1237
+ for (i = 0; cpu_info[i]; i++)
1238
+ printf("%c", (char)(cpu_info[i]));
1239
+ printf("\n");
1240
+
1241
+ #if defined SUPPORT_PCRE2_8
1242
+ pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1243
+ #elif defined SUPPORT_PCRE2_16
1244
+ pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1245
+ #elif defined SUPPORT_PCRE2_32
1246
+ pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1247
+ #endif
1248
+
1249
+ if (!utf)
1250
+ disabled_options |= PCRE2_UTF;
1251
+ #ifdef SUPPORT_PCRE2_8
1252
+ printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled");
1253
+ #endif
1254
+ #ifdef SUPPORT_PCRE2_16
1255
+ printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1256
+ #endif
1257
+ #ifdef SUPPORT_PCRE2_32
1258
+ printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1259
+ #endif
1260
+
1261
+ while (current->pattern) {
1262
+ /* printf("\nPattern: %s :\n", current->pattern); */
1263
+ total++;
1264
+ is_ascii = 0;
1265
+ if (!(current->start_offset & F_PROPERTY))
1266
+ is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1267
+
1268
+ if (current->match_options & PCRE2_PARTIAL_SOFT)
1269
+ jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1270
+ else if (current->match_options & PCRE2_PARTIAL_HARD)
1271
+ jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1272
+ else
1273
+ jit_compile_mode = PCRE2_JIT_COMPLETE;
1274
+ error = 0;
1275
+ #ifdef SUPPORT_PCRE2_8
1276
+ re8 = NULL;
1277
+ ccontext8 = pcre2_compile_context_create_8(NULL);
1278
+ if (ccontext8) {
1279
+ if (GET_NEWLINE(current->newline))
1280
+ pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1281
+ if (GET_BSR(current->newline))
1282
+ pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1283
+
1284
+ if (!(current->start_offset & F_NO8)) {
1285
+ re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1286
+ current->compile_options & ~disabled_options,
1287
+ &error, &err_offs, ccontext8);
1288
+
1289
+ if (!re8 && (utf || is_ascii))
1290
+ printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1291
+ }
1292
+ pcre2_compile_context_free_8(ccontext8);
1293
+ }
1294
+ else
1295
+ printf("\n8 bit: Cannot allocate compile context\n");
1296
+ #endif
1297
+ #ifdef SUPPORT_PCRE2_16
1298
+ if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1299
+ convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1300
+ else
1301
+ copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1302
+
1303
+ re16 = NULL;
1304
+ ccontext16 = pcre2_compile_context_create_16(NULL);
1305
+ if (ccontext16) {
1306
+ if (GET_NEWLINE(current->newline))
1307
+ pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1308
+ if (GET_BSR(current->newline))
1309
+ pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1310
+
1311
+ if (!(current->start_offset & F_NO16)) {
1312
+ re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1313
+ current->compile_options & ~disabled_options,
1314
+ &error, &err_offs, ccontext16);
1315
+
1316
+ if (!re16 && (utf || is_ascii))
1317
+ printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1318
+ }
1319
+ pcre2_compile_context_free_16(ccontext16);
1320
+ }
1321
+ else
1322
+ printf("\n16 bit: Cannot allocate compile context\n");
1323
+ #endif
1324
+ #ifdef SUPPORT_PCRE2_32
1325
+ if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1326
+ convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1327
+ else
1328
+ copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1329
+
1330
+ re32 = NULL;
1331
+ ccontext32 = pcre2_compile_context_create_32(NULL);
1332
+ if (ccontext32) {
1333
+ if (GET_NEWLINE(current->newline))
1334
+ pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1335
+ if (GET_BSR(current->newline))
1336
+ pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1337
+
1338
+ if (!(current->start_offset & F_NO32)) {
1339
+ re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1340
+ current->compile_options & ~disabled_options,
1341
+ &error, &err_offs, ccontext32);
1342
+
1343
+ if (!re32 && (utf || is_ascii))
1344
+ printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1345
+ }
1346
+ pcre2_compile_context_free_32(ccontext32);
1347
+ }
1348
+ else
1349
+ printf("\n32 bit: Cannot allocate compile context\n");
1350
+ #endif
1351
+
1352
+ counter++;
1353
+ if ((counter & 0x3) != 0) {
1354
+ #ifdef SUPPORT_PCRE2_8
1355
+ setstack8(NULL);
1356
+ #endif
1357
+ #ifdef SUPPORT_PCRE2_16
1358
+ setstack16(NULL);
1359
+ #endif
1360
+ #ifdef SUPPORT_PCRE2_32
1361
+ setstack32(NULL);
1362
+ #endif
1363
+ }
1364
+
1365
+ #ifdef SUPPORT_PCRE2_8
1366
+ return_value8[0] = -1000;
1367
+ return_value8[1] = -1000;
1368
+ mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1369
+ mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1370
+ mcontext8 = pcre2_match_context_create_8(NULL);
1371
+ if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1372
+ printf("\n8 bit: Cannot allocate match data\n");
1373
+ pcre2_match_data_free_8(mdata8_1);
1374
+ pcre2_match_data_free_8(mdata8_2);
1375
+ pcre2_match_context_free_8(mcontext8);
1376
+ pcre2_code_free_8(re8);
1377
+ re8 = NULL;
1378
+ } else {
1379
+ ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1380
+ ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1381
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1382
+ ovector8_1[i] = (PCRE2_SIZE)(-2);
1383
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1384
+ ovector8_2[i] = (PCRE2_SIZE)(-2);
1385
+ pcre2_set_match_limit_8(mcontext8, 10000000);
1386
+ }
1387
+ if (re8) {
1388
+ return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1389
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1390
+
1391
+ if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1392
+ printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1393
+ } else if ((counter & 0x1) != 0) {
1394
+ setstack8(mcontext8);
1395
+ return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1396
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1397
+ } else {
1398
+ pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1399
+ return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1400
+ current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1401
+ }
1402
+ }
1403
+ #endif
1404
+
1405
+ #ifdef SUPPORT_PCRE2_16
1406
+ return_value16[0] = -1000;
1407
+ return_value16[1] = -1000;
1408
+ mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1409
+ mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1410
+ mcontext16 = pcre2_match_context_create_16(NULL);
1411
+ if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1412
+ printf("\n16 bit: Cannot allocate match data\n");
1413
+ pcre2_match_data_free_16(mdata16_1);
1414
+ pcre2_match_data_free_16(mdata16_2);
1415
+ pcre2_match_context_free_16(mcontext16);
1416
+ pcre2_code_free_16(re16);
1417
+ re16 = NULL;
1418
+ } else {
1419
+ ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1420
+ ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1421
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1422
+ ovector16_1[i] = (PCRE2_SIZE)(-2);
1423
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1424
+ ovector16_2[i] = (PCRE2_SIZE)(-2);
1425
+ pcre2_set_match_limit_16(mcontext16, 10000000);
1426
+ }
1427
+ if (re16) {
1428
+ if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1429
+ length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1430
+ else
1431
+ length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1432
+
1433
+ return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1434
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1435
+
1436
+ if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1437
+ printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1438
+ } else if ((counter & 0x1) != 0) {
1439
+ setstack16(mcontext16);
1440
+ return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1441
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1442
+ } else {
1443
+ pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1444
+ return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1445
+ current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1446
+ }
1447
+ }
1448
+ #endif
1449
+
1450
+ #ifdef SUPPORT_PCRE2_32
1451
+ return_value32[0] = -1000;
1452
+ return_value32[1] = -1000;
1453
+ mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1454
+ mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1455
+ mcontext32 = pcre2_match_context_create_32(NULL);
1456
+ if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1457
+ printf("\n32 bit: Cannot allocate match data\n");
1458
+ pcre2_match_data_free_32(mdata32_1);
1459
+ pcre2_match_data_free_32(mdata32_2);
1460
+ pcre2_match_context_free_32(mcontext32);
1461
+ pcre2_code_free_32(re32);
1462
+ re32 = NULL;
1463
+ } else {
1464
+ ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1465
+ ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1466
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1467
+ ovector32_1[i] = (PCRE2_SIZE)(-2);
1468
+ for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1469
+ ovector32_2[i] = (PCRE2_SIZE)(-2);
1470
+ pcre2_set_match_limit_32(mcontext32, 10000000);
1471
+ }
1472
+ if (re32) {
1473
+ if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1474
+ length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1475
+ else
1476
+ length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1477
+
1478
+ return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1479
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1480
+
1481
+ if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1482
+ printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1483
+ } else if ((counter & 0x1) != 0) {
1484
+ setstack32(mcontext32);
1485
+ return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1486
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1487
+ } else {
1488
+ pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1489
+ return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1490
+ current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1491
+ }
1492
+ }
1493
+ #endif
1494
+
1495
+ /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1496
+ return_value8[0], return_value16[0], return_value32[0],
1497
+ (int)ovector8_1[0], (int)ovector8_1[1],
1498
+ (int)ovector16_1[0], (int)ovector16_1[1],
1499
+ (int)ovector32_1[0], (int)ovector32_1[1],
1500
+ (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1501
+
1502
+ /* If F_DIFF is set, just run the test, but do not compare the results.
1503
+ Segfaults can still be captured. */
1504
+
1505
+ is_successful = 1;
1506
+ if (!(current->start_offset & F_DIFF)) {
1507
+ #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1508
+ if (!(current->start_offset & F_FORCECONV)) {
1509
+
1510
+ /* All results must be the same. */
1511
+ #ifdef SUPPORT_PCRE2_8
1512
+ if ((return_value = return_value8[0]) != return_value8[1]) {
1513
+ printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1514
+ return_value8[0], return_value8[1], total, current->pattern, current->input);
1515
+ is_successful = 0;
1516
+ } else
1517
+ #endif
1518
+ #ifdef SUPPORT_PCRE2_16
1519
+ if ((return_value = return_value16[0]) != return_value16[1]) {
1520
+ printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1521
+ return_value16[0], return_value16[1], total, current->pattern, current->input);
1522
+ is_successful = 0;
1523
+ } else
1524
+ #endif
1525
+ #ifdef SUPPORT_PCRE2_32
1526
+ if ((return_value = return_value32[0]) != return_value32[1]) {
1527
+ printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1528
+ return_value32[0], return_value32[1], total, current->pattern, current->input);
1529
+ is_successful = 0;
1530
+ } else
1531
+ #endif
1532
+ #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1533
+ if (return_value8[0] != return_value16[0]) {
1534
+ printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1535
+ return_value8[0], return_value16[0],
1536
+ total, current->pattern, current->input);
1537
+ is_successful = 0;
1538
+ } else
1539
+ #endif
1540
+ #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1541
+ if (return_value8[0] != return_value32[0]) {
1542
+ printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1543
+ return_value8[0], return_value32[0],
1544
+ total, current->pattern, current->input);
1545
+ is_successful = 0;
1546
+ } else
1547
+ #endif
1548
+ #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1549
+ if (return_value16[0] != return_value32[0]) {
1550
+ printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1551
+ return_value16[0], return_value32[0],
1552
+ total, current->pattern, current->input);
1553
+ is_successful = 0;
1554
+ } else
1555
+ #endif
1556
+ if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1557
+ if (return_value == PCRE2_ERROR_PARTIAL) {
1558
+ return_value = 2;
1559
+ } else {
1560
+ return_value *= 2;
1561
+ }
1562
+ #ifdef SUPPORT_PCRE2_8
1563
+ return_value8[0] = return_value;
1564
+ #endif
1565
+ #ifdef SUPPORT_PCRE2_16
1566
+ return_value16[0] = return_value;
1567
+ #endif
1568
+ #ifdef SUPPORT_PCRE2_32
1569
+ return_value32[0] = return_value;
1570
+ #endif
1571
+ /* Transform back the results. */
1572
+ if (current->compile_options & PCRE2_UTF) {
1573
+ #ifdef SUPPORT_PCRE2_16
1574
+ for (i = 0; i < return_value; ++i) {
1575
+ if (ovector16_1[i] != PCRE2_UNSET)
1576
+ ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1577
+ if (ovector16_2[i] != PCRE2_UNSET)
1578
+ ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1579
+ }
1580
+ #endif
1581
+ #ifdef SUPPORT_PCRE2_32
1582
+ for (i = 0; i < return_value; ++i) {
1583
+ if (ovector32_1[i] != PCRE2_UNSET)
1584
+ ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1585
+ if (ovector32_2[i] != PCRE2_UNSET)
1586
+ ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1587
+ }
1588
+ #endif
1589
+ }
1590
+
1591
+ for (i = 0; i < return_value; ++i) {
1592
+ #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1593
+ if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1594
+ printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1595
+ i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1596
+ total, current->pattern, current->input);
1597
+ is_successful = 0;
1598
+ }
1599
+ #endif
1600
+ #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1601
+ if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1602
+ printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1603
+ i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1604
+ total, current->pattern, current->input);
1605
+ is_successful = 0;
1606
+ }
1607
+ #endif
1608
+ #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1609
+ if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1610
+ printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1611
+ i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1612
+ total, current->pattern, current->input);
1613
+ is_successful = 0;
1614
+ }
1615
+ #endif
1616
+ }
1617
+ }
1618
+ } else
1619
+ #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1620
+ {
1621
+ #ifdef SUPPORT_PCRE2_8
1622
+ if (return_value8[0] != return_value8[1]) {
1623
+ printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1624
+ return_value8[0], return_value8[1], total, current->pattern, current->input);
1625
+ is_successful = 0;
1626
+ } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1627
+ if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1628
+ return_value8[0] = 2;
1629
+ else
1630
+ return_value8[0] *= 2;
1631
+
1632
+ for (i = 0; i < return_value8[0]; ++i)
1633
+ if (ovector8_1[i] != ovector8_2[i]) {
1634
+ printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1635
+ i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1636
+ is_successful = 0;
1637
+ }
1638
+ }
1639
+ #endif
1640
+
1641
+ #ifdef SUPPORT_PCRE2_16
1642
+ if (return_value16[0] != return_value16[1]) {
1643
+ printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1644
+ return_value16[0], return_value16[1], total, current->pattern, current->input);
1645
+ is_successful = 0;
1646
+ } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1647
+ if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1648
+ return_value16[0] = 2;
1649
+ else
1650
+ return_value16[0] *= 2;
1651
+
1652
+ for (i = 0; i < return_value16[0]; ++i)
1653
+ if (ovector16_1[i] != ovector16_2[i]) {
1654
+ printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1655
+ i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1656
+ is_successful = 0;
1657
+ }
1658
+ }
1659
+ #endif
1660
+
1661
+ #ifdef SUPPORT_PCRE2_32
1662
+ if (return_value32[0] != return_value32[1]) {
1663
+ printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1664
+ return_value32[0], return_value32[1], total, current->pattern, current->input);
1665
+ is_successful = 0;
1666
+ } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1667
+ if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1668
+ return_value32[0] = 2;
1669
+ else
1670
+ return_value32[0] *= 2;
1671
+
1672
+ for (i = 0; i < return_value32[0]; ++i)
1673
+ if (ovector32_1[i] != ovector32_2[i]) {
1674
+ printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1675
+ i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1676
+ is_successful = 0;
1677
+ }
1678
+ }
1679
+ #endif
1680
+ }
1681
+ }
1682
+
1683
+ if (is_successful) {
1684
+ #ifdef SUPPORT_PCRE2_8
1685
+ if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1686
+ if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1687
+ printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1688
+ total, current->pattern, current->input);
1689
+ is_successful = 0;
1690
+ }
1691
+
1692
+ if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1693
+ printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1694
+ total, current->pattern, current->input);
1695
+ is_successful = 0;
1696
+ }
1697
+ }
1698
+ #endif
1699
+ #ifdef SUPPORT_PCRE2_16
1700
+ if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1701
+ if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1702
+ printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1703
+ total, current->pattern, current->input);
1704
+ is_successful = 0;
1705
+ }
1706
+
1707
+ if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1708
+ printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1709
+ total, current->pattern, current->input);
1710
+ is_successful = 0;
1711
+ }
1712
+ }
1713
+ #endif
1714
+ #ifdef SUPPORT_PCRE2_32
1715
+ if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1716
+ if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1717
+ printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1718
+ total, current->pattern, current->input);
1719
+ is_successful = 0;
1720
+ }
1721
+
1722
+ if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1723
+ printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1724
+ total, current->pattern, current->input);
1725
+ is_successful = 0;
1726
+ }
1727
+ }
1728
+ #endif
1729
+ }
1730
+
1731
+ if (is_successful) {
1732
+ #ifdef SUPPORT_PCRE2_8
1733
+ if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1734
+ printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1735
+ total, current->pattern, current->input);
1736
+ is_successful = 0;
1737
+ }
1738
+ #endif
1739
+ #ifdef SUPPORT_PCRE2_16
1740
+ if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1741
+ printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1742
+ total, current->pattern, current->input);
1743
+ is_successful = 0;
1744
+ }
1745
+ #endif
1746
+ #ifdef SUPPORT_PCRE2_32
1747
+ if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1748
+ printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1749
+ total, current->pattern, current->input);
1750
+ is_successful = 0;
1751
+ }
1752
+ #endif
1753
+ }
1754
+
1755
+ #ifdef SUPPORT_PCRE2_8
1756
+ pcre2_code_free_8(re8);
1757
+ pcre2_match_data_free_8(mdata8_1);
1758
+ pcre2_match_data_free_8(mdata8_2);
1759
+ pcre2_match_context_free_8(mcontext8);
1760
+ #endif
1761
+ #ifdef SUPPORT_PCRE2_16
1762
+ pcre2_code_free_16(re16);
1763
+ pcre2_match_data_free_16(mdata16_1);
1764
+ pcre2_match_data_free_16(mdata16_2);
1765
+ pcre2_match_context_free_16(mcontext16);
1766
+ #endif
1767
+ #ifdef SUPPORT_PCRE2_32
1768
+ pcre2_code_free_32(re32);
1769
+ pcre2_match_data_free_32(mdata32_1);
1770
+ pcre2_match_data_free_32(mdata32_2);
1771
+ pcre2_match_context_free_32(mcontext32);
1772
+ #endif
1773
+
1774
+ if (is_successful) {
1775
+ successful++;
1776
+ successful_row++;
1777
+ printf(".");
1778
+ if (successful_row >= 60) {
1779
+ successful_row = 0;
1780
+ printf("\n");
1781
+ }
1782
+ } else
1783
+ successful_row = 0;
1784
+
1785
+ fflush(stdout);
1786
+ current++;
1787
+ }
1788
+ #ifdef SUPPORT_PCRE2_8
1789
+ setstack8(NULL);
1790
+ #endif
1791
+ #ifdef SUPPORT_PCRE2_16
1792
+ setstack16(NULL);
1793
+ #endif
1794
+ #ifdef SUPPORT_PCRE2_32
1795
+ setstack32(NULL);
1796
+ #endif
1797
+
1798
+ if (total == successful) {
1799
+ printf("\nAll JIT regression tests are successfully passed.\n");
1800
+ return 0;
1801
+ } else {
1802
+ printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1803
+ return 1;
1804
+ }
1805
+ }
1806
+
1807
+ #if defined SUPPORT_UNICODE
1808
+
1809
+ static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1810
+ int match_start, int match_end, PCRE2_SIZE *ovector)
1811
+ {
1812
+ if (match_start < 0) {
1813
+ if (result != -1) {
1814
+ printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1815
+ return 1;
1816
+ }
1817
+ return 0;
1818
+ }
1819
+
1820
+ if (result <= 0) {
1821
+ printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1822
+ return 1;
1823
+ }
1824
+
1825
+ if (ovector[0] != (PCRE2_SIZE)match_start) {
1826
+ printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1827
+ pattern_index, type, (int)ovector[0], match_start);
1828
+ return 1;
1829
+ }
1830
+
1831
+ if (ovector[1] != (PCRE2_SIZE)match_end) {
1832
+ printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1833
+ pattern_index, type, (int)ovector[1], match_end);
1834
+ return 1;
1835
+ }
1836
+
1837
+ return 0;
1838
+ }
1839
+
1840
+ #endif /* SUPPORT_UNICODE */
1841
+
1842
+ #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1843
+
1844
+ #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1845
+ #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1846
+ #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1847
+
1848
+ struct invalid_utf8_regression_test_case {
1849
+ uint32_t compile_options;
1850
+ int jit_compile_options;
1851
+ int start_offset;
1852
+ int skip_left;
1853
+ int skip_right;
1854
+ int match_start;
1855
+ int match_end;
1856
+ const char *pattern[2];
1857
+ const char *input;
1858
+ };
1859
+
1860
+ static const char invalid_utf8_newline_cr;
1861
+
1862
+ static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1863
+ { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1864
+ { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1865
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1866
+ { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1867
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1868
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1869
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1870
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1871
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1872
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1873
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1874
+ { UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1875
+ { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1876
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1877
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1878
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1879
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1880
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1881
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1882
+ { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1883
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1884
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1885
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1886
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1887
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1888
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1889
+ { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1890
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1891
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1892
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1893
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1894
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1895
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1896
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1897
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1898
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1899
+ { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1900
+ { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1901
+
1902
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1903
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1904
+ { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1905
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1906
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1907
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1908
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1909
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1910
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1911
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1912
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1913
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1914
+ { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1915
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1916
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1917
+ { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1918
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1919
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1920
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1921
+ { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1922
+ { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1923
+ { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1924
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1925
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1926
+ { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1927
+
1928
+ { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1929
+ { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1930
+ { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1931
+ { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1932
+ { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1933
+ { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1934
+ { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1935
+ { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1936
+
1937
+ { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1938
+ { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1939
+ { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1940
+ { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1941
+ { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1942
+ { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1943
+ { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1944
+
1945
+ { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1946
+ { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1947
+ { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1948
+ { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1949
+
1950
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1951
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1952
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1953
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1954
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1955
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1956
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1957
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1958
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1959
+
1960
+ { UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1961
+ { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1962
+ { UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1963
+ { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1964
+ { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1965
+ { UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1966
+ { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1967
+ { UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1968
+ { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1969
+
1970
+ { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1971
+ { UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1972
+ { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1973
+ { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1974
+
1975
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1976
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1977
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1978
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1979
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1980
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1981
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1982
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1983
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1984
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1985
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1986
+
1987
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1988
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1989
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1990
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1991
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1992
+ { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1993
+
1994
+ { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1995
+ { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1996
+ { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1997
+ { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1998
+
1999
+ { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
2000
+ { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
2001
+ { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "\\D+", NULL }, "n\xc3\xb1" },
2002
+ { PCRE2_UTF, CI, 0, 0, 0, 0, 5, { "\\W+", NULL }, "@\xf0\x9d\x84\x9e" },
2003
+
2004
+ /* These two are not invalid UTF tests, but this infrastructure fits better for them. */
2005
+ { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
2006
+ { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
2007
+
2008
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
2009
+
2010
+ { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2011
+ };
2012
+
2013
+ #undef UDA
2014
+ #undef CI
2015
+ #undef CPI
2016
+
2017
+ static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
2018
+ int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
2019
+ {
2020
+ pcre2_code_8 *code;
2021
+ int result, errorcode;
2022
+ PCRE2_SIZE length, erroroffset;
2023
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2024
+
2025
+ if (current->pattern[i] == NULL)
2026
+ return 1;
2027
+
2028
+ code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2029
+ current->compile_options, &errorcode, &erroroffset, ccontext);
2030
+
2031
+ if (!code) {
2032
+ printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2033
+ return 0;
2034
+ }
2035
+
2036
+ if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2037
+ printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2038
+ pcre2_code_free_8(code);
2039
+ return 0;
2040
+ }
2041
+
2042
+ length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2043
+
2044
+ if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2045
+ result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2046
+ length, current->start_offset - current->skip_left, 0, mdata, NULL);
2047
+
2048
+ if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2049
+ pcre2_code_free_8(code);
2050
+ return 0;
2051
+ }
2052
+ }
2053
+
2054
+ if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2055
+ result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2056
+ length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2057
+
2058
+ if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2059
+ pcre2_code_free_8(code);
2060
+ return 0;
2061
+ }
2062
+ }
2063
+
2064
+ pcre2_code_free_8(code);
2065
+ return 1;
2066
+ }
2067
+
2068
+ static int invalid_utf8_regression_tests(void)
2069
+ {
2070
+ const struct invalid_utf8_regression_test_case *current;
2071
+ pcre2_compile_context_8 *ccontext;
2072
+ pcre2_match_data_8 *mdata;
2073
+ int total = 0, successful = 0;
2074
+ int result;
2075
+
2076
+ printf("\nRunning invalid-utf8 JIT regression tests\n");
2077
+
2078
+ ccontext = pcre2_compile_context_create_8(NULL);
2079
+ pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2080
+ mdata = pcre2_match_data_create_8(4, NULL);
2081
+
2082
+ for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2083
+ /* printf("\nPattern: %s :\n", current->pattern); */
2084
+ total++;
2085
+
2086
+ result = 1;
2087
+ if (current->pattern[1] != &invalid_utf8_newline_cr)
2088
+ {
2089
+ if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2090
+ result = 0;
2091
+ if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2092
+ result = 0;
2093
+ } else {
2094
+ pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2095
+ if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2096
+ result = 0;
2097
+ pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2098
+ }
2099
+
2100
+ if (result) {
2101
+ successful++;
2102
+ }
2103
+
2104
+ printf(".");
2105
+ if ((total % 60) == 0)
2106
+ printf("\n");
2107
+ }
2108
+
2109
+ if ((total % 60) != 0)
2110
+ printf("\n");
2111
+
2112
+ pcre2_match_data_free_8(mdata);
2113
+ pcre2_compile_context_free_8(ccontext);
2114
+
2115
+ if (total == successful) {
2116
+ printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2117
+ return 0;
2118
+ } else {
2119
+ printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2120
+ return 1;
2121
+ }
2122
+ }
2123
+
2124
+ #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2125
+
2126
+ static int invalid_utf8_regression_tests(void)
2127
+ {
2128
+ return 0;
2129
+ }
2130
+
2131
+ #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2132
+
2133
+ #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2134
+
2135
+ #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2136
+ #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2137
+ #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2138
+
2139
+ struct invalid_utf16_regression_test_case {
2140
+ uint32_t compile_options;
2141
+ int jit_compile_options;
2142
+ int start_offset;
2143
+ int skip_left;
2144
+ int skip_right;
2145
+ int match_start;
2146
+ int match_end;
2147
+ const PCRE2_UCHAR16 *pattern[2];
2148
+ const PCRE2_UCHAR16 *input;
2149
+ };
2150
+
2151
+ static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2152
+ static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2153
+ static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2154
+ static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2155
+ static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2156
+ static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2157
+ static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2158
+ static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2159
+ static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2160
+ static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2161
+ static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2162
+ static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2163
+ static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2164
+ static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2165
+ static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2166
+ static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2167
+ static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2168
+ static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2169
+ static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2170
+ static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2171
+
2172
+ static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2173
+ { UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2174
+ { UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2175
+ { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2176
+ { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2177
+ { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2178
+ { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2179
+ { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2180
+ { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2181
+ { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2182
+ { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2183
+
2184
+ { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2185
+ { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2186
+ { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2187
+ { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2188
+ { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2189
+ { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2190
+ { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2191
+ { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2192
+ { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2193
+ { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2194
+
2195
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2196
+ { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2197
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2198
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2199
+
2200
+ { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2201
+ { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2202
+ { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2203
+ { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2204
+ { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2205
+ { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2206
+
2207
+ { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2208
+ { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2209
+ { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2210
+
2211
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2212
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2213
+
2214
+ { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2215
+ { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2216
+ { PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2217
+ { PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2218
+
2219
+ { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2220
+ };
2221
+
2222
+ #undef UDA
2223
+ #undef CI
2224
+ #undef CPI
2225
+
2226
+ static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2227
+ int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2228
+ {
2229
+ pcre2_code_16 *code;
2230
+ int result, errorcode;
2231
+ PCRE2_SIZE length, erroroffset;
2232
+ const PCRE2_UCHAR16 *input;
2233
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2234
+
2235
+ if (current->pattern[i] == NULL)
2236
+ return 1;
2237
+
2238
+ code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2239
+ current->compile_options, &errorcode, &erroroffset, ccontext);
2240
+
2241
+ if (!code) {
2242
+ printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2243
+ return 0;
2244
+ }
2245
+
2246
+ if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2247
+ printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2248
+ pcre2_code_free_16(code);
2249
+ return 0;
2250
+ }
2251
+
2252
+ input = current->input;
2253
+ length = 0;
2254
+
2255
+ while (*input++ != 0)
2256
+ length++;
2257
+
2258
+ length -= current->skip_left + current->skip_right;
2259
+
2260
+ if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2261
+ result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2262
+ length, current->start_offset - current->skip_left, 0, mdata, NULL);
2263
+
2264
+ if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2265
+ pcre2_code_free_16(code);
2266
+ return 0;
2267
+ }
2268
+ }
2269
+
2270
+ if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2271
+ result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2272
+ length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2273
+
2274
+ if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2275
+ pcre2_code_free_16(code);
2276
+ return 0;
2277
+ }
2278
+ }
2279
+
2280
+ pcre2_code_free_16(code);
2281
+ return 1;
2282
+ }
2283
+
2284
+ static int invalid_utf16_regression_tests(void)
2285
+ {
2286
+ const struct invalid_utf16_regression_test_case *current;
2287
+ pcre2_compile_context_16 *ccontext;
2288
+ pcre2_match_data_16 *mdata;
2289
+ int total = 0, successful = 0;
2290
+ int result;
2291
+
2292
+ printf("\nRunning invalid-utf16 JIT regression tests\n");
2293
+
2294
+ ccontext = pcre2_compile_context_create_16(NULL);
2295
+ pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2296
+ mdata = pcre2_match_data_create_16(4, NULL);
2297
+
2298
+ for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2299
+ /* printf("\nPattern: %s :\n", current->pattern); */
2300
+ total++;
2301
+
2302
+ result = 1;
2303
+ if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2304
+ result = 0;
2305
+ if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2306
+ result = 0;
2307
+
2308
+ if (result) {
2309
+ successful++;
2310
+ }
2311
+
2312
+ printf(".");
2313
+ if ((total % 60) == 0)
2314
+ printf("\n");
2315
+ }
2316
+
2317
+ if ((total % 60) != 0)
2318
+ printf("\n");
2319
+
2320
+ pcre2_match_data_free_16(mdata);
2321
+ pcre2_compile_context_free_16(ccontext);
2322
+
2323
+ if (total == successful) {
2324
+ printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2325
+ return 0;
2326
+ } else {
2327
+ printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2328
+ return 1;
2329
+ }
2330
+ }
2331
+
2332
+ #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2333
+
2334
+ static int invalid_utf16_regression_tests(void)
2335
+ {
2336
+ return 0;
2337
+ }
2338
+
2339
+ #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2340
+
2341
+ #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2342
+
2343
+ #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2344
+ #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2345
+ #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2346
+
2347
+ struct invalid_utf32_regression_test_case {
2348
+ uint32_t compile_options;
2349
+ int jit_compile_options;
2350
+ int start_offset;
2351
+ int skip_left;
2352
+ int skip_right;
2353
+ int match_start;
2354
+ int match_end;
2355
+ const PCRE2_UCHAR32 *pattern[2];
2356
+ const PCRE2_UCHAR32 *input;
2357
+ };
2358
+
2359
+ static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2360
+ static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2361
+ static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2362
+ static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2363
+ static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2364
+ static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2365
+ static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2366
+ static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2367
+ static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2368
+ static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2369
+ static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2370
+ static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2371
+ static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2372
+
2373
+ static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2374
+ { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2375
+ { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2376
+ { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2377
+ { UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2378
+ { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2379
+ { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2380
+
2381
+ { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2382
+ { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2383
+ { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2384
+ { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2385
+ { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2386
+
2387
+ { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2388
+ { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2389
+
2390
+ { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2391
+ { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2392
+ { UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2393
+ { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2394
+ { UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2395
+ { UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2396
+
2397
+ { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2398
+ { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2399
+ { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2400
+ { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2401
+ { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2402
+
2403
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2404
+ { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2405
+
2406
+ { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2407
+ };
2408
+
2409
+ #undef UDA
2410
+ #undef CI
2411
+ #undef CPI
2412
+
2413
+ static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2414
+ int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2415
+ {
2416
+ pcre2_code_32 *code;
2417
+ int result, errorcode;
2418
+ PCRE2_SIZE length, erroroffset;
2419
+ const PCRE2_UCHAR32 *input;
2420
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2421
+
2422
+ if (current->pattern[i] == NULL)
2423
+ return 1;
2424
+
2425
+ code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2426
+ current->compile_options, &errorcode, &erroroffset, ccontext);
2427
+
2428
+ if (!code) {
2429
+ printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2430
+ return 0;
2431
+ }
2432
+
2433
+ if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2434
+ printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2435
+ pcre2_code_free_32(code);
2436
+ return 0;
2437
+ }
2438
+
2439
+ input = current->input;
2440
+ length = 0;
2441
+
2442
+ while (*input++ != 0)
2443
+ length++;
2444
+
2445
+ length -= current->skip_left + current->skip_right;
2446
+
2447
+ if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2448
+ result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2449
+ length, current->start_offset - current->skip_left, 0, mdata, NULL);
2450
+
2451
+ if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2452
+ pcre2_code_free_32(code);
2453
+ return 0;
2454
+ }
2455
+ }
2456
+
2457
+ if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2458
+ result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2459
+ length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2460
+
2461
+ if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2462
+ pcre2_code_free_32(code);
2463
+ return 0;
2464
+ }
2465
+ }
2466
+
2467
+ pcre2_code_free_32(code);
2468
+ return 1;
2469
+ }
2470
+
2471
+ static int invalid_utf32_regression_tests(void)
2472
+ {
2473
+ const struct invalid_utf32_regression_test_case *current;
2474
+ pcre2_compile_context_32 *ccontext;
2475
+ pcre2_match_data_32 *mdata;
2476
+ int total = 0, successful = 0;
2477
+ int result;
2478
+
2479
+ printf("\nRunning invalid-utf32 JIT regression tests\n");
2480
+
2481
+ ccontext = pcre2_compile_context_create_32(NULL);
2482
+ pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2483
+ mdata = pcre2_match_data_create_32(4, NULL);
2484
+
2485
+ for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2486
+ /* printf("\nPattern: %s :\n", current->pattern); */
2487
+ total++;
2488
+
2489
+ result = 1;
2490
+ if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2491
+ result = 0;
2492
+ if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2493
+ result = 0;
2494
+
2495
+ if (result) {
2496
+ successful++;
2497
+ }
2498
+
2499
+ printf(".");
2500
+ if ((total % 60) == 0)
2501
+ printf("\n");
2502
+ }
2503
+
2504
+ if ((total % 60) != 0)
2505
+ printf("\n");
2506
+
2507
+ pcre2_match_data_free_32(mdata);
2508
+ pcre2_compile_context_free_32(ccontext);
2509
+
2510
+ if (total == successful) {
2511
+ printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2512
+ return 0;
2513
+ } else {
2514
+ printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2515
+ return 1;
2516
+ }
2517
+ }
2518
+
2519
+ #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2520
+
2521
+ static int invalid_utf32_regression_tests(void)
2522
+ {
2523
+ return 0;
2524
+ }
2525
+
2526
+ #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2527
+
2528
+ /* End of pcre2_jit_test.c */