tpy-lang 0.3.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. tpy_lang-0.3.0.dev0.dist-info/METADATA +151 -0
  2. tpy_lang-0.3.0.dev0.dist-info/RECORD +333 -0
  3. tpy_lang-0.3.0.dev0.dist-info/WHEEL +4 -0
  4. tpy_lang-0.3.0.dev0.dist-info/entry_points.txt +3 -0
  5. tpyc/__init__.py +104 -0
  6. tpyc/__main__.py +6 -0
  7. tpyc/_buildinfo.py +1 -0
  8. tpyc/_data/docs/LANGUAGE_FEATURES.md +6278 -0
  9. tpyc/_data/docs/STDLIB_ROADMAP.md +1258 -0
  10. tpyc/_data/docs/TPY_FOR_AGENTS.md +556 -0
  11. tpyc/_data/lib/tpy/_bindings/__init__.py +6 -0
  12. tpyc/_data/lib/tpy/_bindings/pcre2.py +173 -0
  13. tpyc/_data/lib/tpy/_bindings/posix_socket.py +161 -0
  14. tpyc/_data/lib/tpy/_functools_macros.py +80 -0
  15. tpyc/_data/lib/tpy/_macro_helpers.py +161 -0
  16. tpyc/_data/lib/tpy/argparse.py +2062 -0
  17. tpyc/_data/lib/tpy/asyncio/__init__.py +744 -0
  18. tpyc/_data/lib/tpy/asyncio/_executor.py +515 -0
  19. tpyc/_data/lib/tpy/base64.py +410 -0
  20. tpyc/_data/lib/tpy/bisect.py +39 -0
  21. tpyc/_data/lib/tpy/builtins.py +38 -0
  22. tpyc/_data/lib/tpy/dataclasses.py +354 -0
  23. tpyc/_data/lib/tpy/enum.py +23 -0
  24. tpyc/_data/lib/tpy/functools.py +33 -0
  25. tpyc/_data/lib/tpy/hashlib.py +206 -0
  26. tpyc/_data/lib/tpy/heapq.py +118 -0
  27. tpyc/_data/lib/tpy/io.py +395 -0
  28. tpyc/_data/lib/tpy/json.py +221 -0
  29. tpyc/_data/lib/tpy/math.py +406 -0
  30. tpyc/_data/lib/tpy/random.py +597 -0
  31. tpyc/_data/lib/tpy/re.py +467 -0
  32. tpyc/_data/lib/tpy/socket.py +379 -0
  33. tpyc/_data/lib/tpy/struct.py +178 -0
  34. tpyc/_data/lib/tpy/sys.py +40 -0
  35. tpyc/_data/lib/tpy/time.py +39 -0
  36. tpyc/_data/lib/tpy/tpy/__init__.py +78 -0
  37. tpyc/_data/lib/tpy/tpy/_bootstrap/__init__.py +10 -0
  38. tpyc/_data/lib/tpy/tpy/_bootstrap/_decorators.py +37 -0
  39. tpyc/_data/lib/tpy/tpy/_bootstrap/_extern.py +64 -0
  40. tpyc/_data/lib/tpy/tpy/_builtins/__init__.py +11 -0
  41. tpyc/_data/lib/tpy/tpy/_builtins/_bytes.py +378 -0
  42. tpyc/_data/lib/tpy/tpy/_builtins/_dict.py +151 -0
  43. tpyc/_data/lib/tpy/tpy/_builtins/_exceptions.py +125 -0
  44. tpyc/_data/lib/tpy/tpy/_builtins/_funcs.py +681 -0
  45. tpyc/_data/lib/tpy/tpy/_builtins/_io.py +97 -0
  46. tpyc/_data/lib/tpy/tpy/_builtins/_list.py +127 -0
  47. tpyc/_data/lib/tpy/tpy/_builtins/_range.py +52 -0
  48. tpyc/_data/lib/tpy/tpy/_builtins/_set.py +139 -0
  49. tpyc/_data/lib/tpy/tpy/_builtins/_super.py +11 -0
  50. tpyc/_data/lib/tpy/tpy/_builtins/_types.py +661 -0
  51. tpyc/_data/lib/tpy/tpy/_core/__init__.py +23 -0
  52. tpyc/_data/lib/tpy/tpy/_core/_bytes_view.py +129 -0
  53. tpyc/_data/lib/tpy/tpy/_core/_containers.py +137 -0
  54. tpyc/_data/lib/tpy/tpy/_core/_functions.py +40 -0
  55. tpyc/_data/lib/tpy/tpy/_core/_types.py +2061 -0
  56. tpyc/_data/lib/tpy/tpy/_typing/__init__.py +77 -0
  57. tpyc/_data/lib/tpy/tpy/_version.py +29 -0
  58. tpyc/_data/lib/tpy/tpy/bits.py +28 -0
  59. tpyc/_data/lib/tpy/tpy/coro/__init__.py +127 -0
  60. tpyc/_data/lib/tpy/tpy/extern.py +8 -0
  61. tpyc/_data/lib/tpy/tpy/mem.py +49 -0
  62. tpyc/_data/lib/tpy/tpy/unsafe.py +195 -0
  63. tpyc/_data/lib/tpy/tpy/version.py +21 -0
  64. tpyc/_data/lib/tpy/typing.py +13 -0
  65. tpyc/_data/runtime/cpp/include/tpy/any.hpp +461 -0
  66. tpyc/_data/runtime/cpp/include/tpy/as_ostream.hpp +117 -0
  67. tpyc/_data/runtime/cpp/include/tpy/async.hpp +76 -0
  68. tpyc/_data/runtime/cpp/include/tpy/bigint.hpp +1343 -0
  69. tpyc/_data/runtime/cpp/include/tpy/builtins.hpp +400 -0
  70. tpyc/_data/runtime/cpp/include/tpy/bytes_ops.hpp +469 -0
  71. tpyc/_data/runtime/cpp/include/tpy/container_ops.hpp +487 -0
  72. tpyc/_data/runtime/cpp/include/tpy/copy_iter.hpp +82 -0
  73. tpyc/_data/runtime/cpp/include/tpy/core.hpp +558 -0
  74. tpyc/_data/runtime/cpp/include/tpy/dict_ops.hpp +289 -0
  75. tpyc/_data/runtime/cpp/include/tpy/dunder.hpp +750 -0
  76. tpyc/_data/runtime/cpp/include/tpy/dynamic.hpp +44 -0
  77. tpyc/_data/runtime/cpp/include/tpy/enum.hpp +40 -0
  78. tpyc/_data/runtime/cpp/include/tpy/file.hpp +245 -0
  79. tpyc/_data/runtime/cpp/include/tpy/fixed_int.hpp +317 -0
  80. tpyc/_data/runtime/cpp/include/tpy/format.hpp +954 -0
  81. tpyc/_data/runtime/cpp/include/tpy/frame_slot.hpp +120 -0
  82. tpyc/_data/runtime/cpp/include/tpy/generator.hpp +47 -0
  83. tpyc/_data/runtime/cpp/include/tpy/iterable_ops.hpp +122 -0
  84. tpyc/_data/runtime/cpp/include/tpy/itertools.hpp +749 -0
  85. tpyc/_data/runtime/cpp/include/tpy/next_iter.hpp +82 -0
  86. tpyc/_data/runtime/cpp/include/tpy/ordered_map.hpp +518 -0
  87. tpyc/_data/runtime/cpp/include/tpy/ordered_set.hpp +337 -0
  88. tpyc/_data/runtime/cpp/include/tpy/own_iter.hpp +54 -0
  89. tpyc/_data/runtime/cpp/include/tpy/pascal_graph_sdl.hpp +192 -0
  90. tpyc/_data/runtime/cpp/include/tpy/printing.hpp +302 -0
  91. tpyc/_data/runtime/cpp/include/tpy/protocols.hpp +61 -0
  92. tpyc/_data/runtime/cpp/include/tpy/range.hpp +115 -0
  93. tpyc/_data/runtime/cpp/include/tpy/ranges.hpp +212 -0
  94. tpyc/_data/runtime/cpp/include/tpy/set_ops.hpp +265 -0
  95. tpyc/_data/runtime/cpp/include/tpy/slice.hpp +47 -0
  96. tpyc/_data/runtime/cpp/include/tpy/span_iter.hpp +42 -0
  97. tpyc/_data/runtime/cpp/include/tpy/stdlib/math.hpp +41 -0
  98. tpyc/_data/runtime/cpp/include/tpy/stdlib/pcre2_h.hpp +96 -0
  99. tpyc/_data/runtime/cpp/include/tpy/stdlib/random.hpp +25 -0
  100. tpyc/_data/runtime/cpp/include/tpy/stdlib/socket_h.hpp +145 -0
  101. tpyc/_data/runtime/cpp/include/tpy/stdlib/time.hpp +62 -0
  102. tpyc/_data/runtime/cpp/include/tpy/system.hpp +121 -0
  103. tpyc/_data/runtime/cpp/include/tpy/throwable.hpp +55 -0
  104. tpyc/_data/runtime/cpp/include/tpy/tpy.hpp +156 -0
  105. tpyc/_data/runtime/cpp/include/tpy/type_name.hpp +77 -0
  106. tpyc/_data/runtime/cpp/include/tpy/type_traits.hpp +240 -0
  107. tpyc/_data/runtime/cpp/include/tpy/uninit_array_storage.hpp +250 -0
  108. tpyc/_data/runtime/cpp/include/tpy/uninit_heap_storage.hpp +277 -0
  109. tpyc/_data/runtime/cpp/include/tpy/varargs.hpp +174 -0
  110. tpyc/_data/runtime/cpp/include/tpy/variant_ref.hpp +118 -0
  111. tpyc/_data/runtime/cpp/src/stdlib/socket_impl.cpp +104 -0
  112. tpyc/_data/runtime/cpp/third_party/README.md +58 -0
  113. tpyc/_data/runtime/cpp/third_party/pcre2/AUTHORS +36 -0
  114. tpyc/_data/runtime/cpp/third_party/pcre2/CMakeLists.txt +1233 -0
  115. tpyc/_data/runtime/cpp/third_party/pcre2/COPYING +5 -0
  116. tpyc/_data/runtime/cpp/third_party/pcre2/ChangeLog +3097 -0
  117. tpyc/_data/runtime/cpp/third_party/pcre2/HACKING +853 -0
  118. tpyc/_data/runtime/cpp/third_party/pcre2/INSTALL +368 -0
  119. tpyc/_data/runtime/cpp/third_party/pcre2/LICENCE +94 -0
  120. tpyc/_data/runtime/cpp/third_party/pcre2/NEWS +492 -0
  121. tpyc/_data/runtime/cpp/third_party/pcre2/NON-AUTOTOOLS-BUILD +430 -0
  122. tpyc/_data/runtime/cpp/third_party/pcre2/README +956 -0
  123. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/COPYING-CMAKE-SCRIPTS +22 -0
  124. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindEditline.cmake +16 -0
  125. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindPackageHandleStandardArgs.cmake +58 -0
  126. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/FindReadline.cmake +29 -0
  127. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config-version.cmake.in +15 -0
  128. tpyc/_data/runtime/cpp/third_party/pcre2/cmake/pcre2-config.cmake.in +148 -0
  129. tpyc/_data/runtime/cpp/third_party/pcre2/config-cmake.h.in +56 -0
  130. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-16.pc.in +13 -0
  131. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-32.pc.in +13 -0
  132. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-8.pc.in +13 -0
  133. tpyc/_data/runtime/cpp/third_party/pcre2/libpcre2-posix.pc.in +13 -0
  134. tpyc/_data/runtime/cpp/third_party/pcre2/pcre2-config.in +121 -0
  135. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h +483 -0
  136. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.generic +483 -0
  137. tpyc/_data/runtime/cpp/third_party/pcre2/src/config.h.in +460 -0
  138. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h +1010 -0
  139. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.generic +1010 -0
  140. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2.h.in +1010 -0
  141. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_auto_possess.c +1371 -0
  142. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c +196 -0
  143. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chartables.c.dist +196 -0
  144. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_chkdint.c +96 -0
  145. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_compile.c +11001 -0
  146. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_config.c +252 -0
  147. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_context.c +510 -0
  148. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_convert.c +1189 -0
  149. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dfa_match.c +4119 -0
  150. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_dftables.c +297 -0
  151. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_error.c +345 -0
  152. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_extuni.c +162 -0
  153. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_find_bracket.c +219 -0
  154. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_fuzzsupport.c +792 -0
  155. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_internal.h +2084 -0
  156. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_intmodedep.h +940 -0
  157. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_compile.c +14972 -0
  158. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_match.c +200 -0
  159. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_misc.c +234 -0
  160. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_neon_inc.h +354 -0
  161. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_simd_inc.h +2355 -0
  162. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_jit_test.c +2528 -0
  163. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_maketables.c +165 -0
  164. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match.c +7777 -0
  165. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_match_data.c +185 -0
  166. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_newline.c +243 -0
  167. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ord2utf.c +120 -0
  168. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_pattern_info.c +432 -0
  169. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_printint.c +886 -0
  170. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_script_run.c +344 -0
  171. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_serialize.c +286 -0
  172. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_string_utils.c +237 -0
  173. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_study.c +1915 -0
  174. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substitute.c +1009 -0
  175. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_substring.c +550 -0
  176. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_tables.c +234 -0
  177. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucd.c +5460 -0
  178. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucp.h +396 -0
  179. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_ucptables.c +1533 -0
  180. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_valid_utf.c +398 -0
  181. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2_xclass.c +308 -0
  182. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2demo.c +497 -0
  183. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2grep.c +4606 -0
  184. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.c +425 -0
  185. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix.h +187 -0
  186. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2posix_test.c +209 -0
  187. tpyc/_data/runtime/cpp/third_party/pcre2/src/pcre2test.c +9708 -0
  188. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c +137 -0
  189. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c +327 -0
  190. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c +89 -0
  191. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c +62 -0
  192. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c +40 -0
  193. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c +72 -0
  194. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c +172 -0
  195. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c +141 -0
  196. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c +102 -0
  197. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfig.h +142 -0
  198. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigCPU.h +188 -0
  199. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitConfigInternal.h +907 -0
  200. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.c +3561 -0
  201. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitLir.h +2466 -0
  202. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_32.c +4636 -0
  203. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_64.c +3491 -0
  204. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeARM_T2_32.c +4302 -0
  205. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeLOONGARCH_64.c +3765 -0
  206. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_32.c +472 -0
  207. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_64.c +387 -0
  208. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeMIPS_common.c +4259 -0
  209. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_32.c +485 -0
  210. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_64.c +719 -0
  211. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativePPC_common.c +3161 -0
  212. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_32.c +142 -0
  213. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_64.c +222 -0
  214. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeRISCV_common.c +3121 -0
  215. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeS390X.c +4526 -0
  216. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_32.c +1685 -0
  217. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_64.c +1398 -0
  218. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitNativeX86_common.c +5001 -0
  219. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitSerialize.c +516 -0
  220. tpyc/_data/runtime/cpp/third_party/pcre2/src/sljit/sljitUtils.c +344 -0
  221. tpyc/_data/runtime/cpp/third_party/pcre2.sources.txt +54 -0
  222. tpyc/_data/runtime/cpp/third_party/pcre2.vendor.json +7 -0
  223. tpyc/build/__init__.py +7 -0
  224. tpyc/build/pcre2.py +122 -0
  225. tpyc/build/third_party.py +413 -0
  226. tpyc/cli.py +822 -0
  227. tpyc/codegen_cpp/__init__.py +18 -0
  228. tpyc/codegen_cpp/builtins.py +484 -0
  229. tpyc/codegen_cpp/context.py +2064 -0
  230. tpyc/codegen_cpp/expressions.py +5940 -0
  231. tpyc/codegen_cpp/functions.py +1913 -0
  232. tpyc/codegen_cpp/gen_async.py +3258 -0
  233. tpyc/codegen_cpp/gen_generators.py +657 -0
  234. tpyc/codegen_cpp/generator.py +2258 -0
  235. tpyc/codegen_cpp/match.py +1997 -0
  236. tpyc/codegen_cpp/param_const.py +172 -0
  237. tpyc/codegen_cpp/protocols.py +907 -0
  238. tpyc/codegen_cpp/records.py +1654 -0
  239. tpyc/codegen_cpp/resumable_cfg.py +1651 -0
  240. tpyc/codegen_cpp/statements.py +4963 -0
  241. tpyc/codegen_cpp/string_dispatch.py +76 -0
  242. tpyc/codegen_cpp/test_context.py +46 -0
  243. tpyc/codegen_cpp/test_param_const.py +113 -0
  244. tpyc/codegen_cpp/test_resumable_cfg.py +182 -0
  245. tpyc/codegen_cpp/type_resolution.py +53 -0
  246. tpyc/codegen_cpp/types.py +436 -0
  247. tpyc/codegen_cpp/variant_access.py +135 -0
  248. tpyc/coercions.py +749 -0
  249. tpyc/compilation_context.py +57 -0
  250. tpyc/compiler.py +3945 -0
  251. tpyc/cycle_detection.py +358 -0
  252. tpyc/diagnostics.py +135 -0
  253. tpyc/dump_types.py +353 -0
  254. tpyc/frontend_diagnostics.py +47 -0
  255. tpyc/frontend_ir/__init__.py +140 -0
  256. tpyc/frontend_ir/lower.py +1098 -0
  257. tpyc/frontend_ir/nodes.py +718 -0
  258. tpyc/frontend_ir/resolver_adapter.py +151 -0
  259. tpyc/frontend_plugin.py +209 -0
  260. tpyc/install_docs.py +81 -0
  261. tpyc/liveness.py +756 -0
  262. tpyc/macro_api.py +1724 -0
  263. tpyc/macro_loader.py +497 -0
  264. tpyc/module_names.py +64 -0
  265. tpyc/modules/__init__.py +31 -0
  266. tpyc/modules/defs.py +89 -0
  267. tpyc/modules/registry.py +36 -0
  268. tpyc/modules/resolver.py +192 -0
  269. tpyc/modules/type_resolution.py +629 -0
  270. tpyc/namespace.py +172 -0
  271. tpyc/parse/__init__.py +84 -0
  272. tpyc/parse/imports.py +490 -0
  273. tpyc/parse/nodes.py +1732 -0
  274. tpyc/parse/parser.py +4043 -0
  275. tpyc/parse/resolve_refs.py +466 -0
  276. tpyc/parse/type_resolver.py +1060 -0
  277. tpyc/prescan.py +254 -0
  278. tpyc/qnames.py +149 -0
  279. tpyc/repl.py +529 -0
  280. tpyc/repl_backends.py +848 -0
  281. tpyc/sema/__init__.py +21 -0
  282. tpyc/sema/analyzer.py +3625 -0
  283. tpyc/sema/bound_check.py +72 -0
  284. tpyc/sema/builder_trace.py +684 -0
  285. tpyc/sema/calls.py +5406 -0
  286. tpyc/sema/compatibility.py +2107 -0
  287. tpyc/sema/context.py +1243 -0
  288. tpyc/sema/expressions.py +3737 -0
  289. tpyc/sema/flow_facts.py +199 -0
  290. tpyc/sema/init_tracker.py +150 -0
  291. tpyc/sema/list_literals.py +69 -0
  292. tpyc/sema/literal_utils.py +27 -0
  293. tpyc/sema/local_deduction.py +1088 -0
  294. tpyc/sema/macros.py +179 -0
  295. tpyc/sema/match.py +1177 -0
  296. tpyc/sema/method_expansion.py +347 -0
  297. tpyc/sema/methods.py +2197 -0
  298. tpyc/sema/mutation_propagation.py +268 -0
  299. tpyc/sema/narrowing.py +857 -0
  300. tpyc/sema/numeric_lattice.py +160 -0
  301. tpyc/sema/operators.py +402 -0
  302. tpyc/sema/overloads.py +841 -0
  303. tpyc/sema/protocols.py +1209 -0
  304. tpyc/sema/reach_analysis.py +202 -0
  305. tpyc/sema/registration.py +3156 -0
  306. tpyc/sema/scope_tracker.py +193 -0
  307. tpyc/sema/statements.py +4426 -0
  308. tpyc/sema/type_ops.py +1879 -0
  309. tpyc/sema/value_range.py +181 -0
  310. tpyc/symbol_binding.py +259 -0
  311. tpyc/test_c3_mro.py +208 -0
  312. tpyc/test_cli_argv.py +52 -0
  313. tpyc/test_compiler.py +559 -0
  314. tpyc/test_contains_type_param.py +101 -0
  315. tpyc/test_cycle_detection.py +221 -0
  316. tpyc/test_dump_types.py +225 -0
  317. tpyc/test_install_docs.py +65 -0
  318. tpyc/test_local_cpp_form.py +135 -0
  319. tpyc/test_macro_loader.py +76 -0
  320. tpyc/test_method_expansion.py +254 -0
  321. tpyc/test_nominal_identity.py +182 -0
  322. tpyc/test_overloads.py +410 -0
  323. tpyc/test_parse.py +303 -0
  324. tpyc/test_parse_type_ref.py +506 -0
  325. tpyc/test_parse_version_info.py +58 -0
  326. tpyc/test_reach_analysis.py +72 -0
  327. tpyc/test_ref_type.py +216 -0
  328. tpyc/test_send_sync_substitution.py +276 -0
  329. tpyc/test_tuple_mutation_propagation.py +206 -0
  330. tpyc/test_type_def_registry.py +1729 -0
  331. tpyc/test_union_types.py +195 -0
  332. tpyc/type_def_registry.py +975 -0
  333. tpyc/typesys.py +5104 -0
@@ -0,0 +1,2355 @@
1
+ /*************************************************
2
+ * Perl-Compatible Regular Expressions *
3
+ *************************************************/
4
+
5
+ /* PCRE is a library of functions to support regular expressions whose syntax
6
+ and semantics are as close as possible to those of the Perl 5 language.
7
+
8
+ Written by Philip Hazel
9
+ This module by Zoltan Herczeg
10
+ Original API code Copyright (c) 1997-2012 University of Cambridge
11
+ New API code Copyright (c) 2016-2019 University of Cambridge
12
+
13
+ -----------------------------------------------------------------------------
14
+ Redistribution and use in source and binary forms, with or without
15
+ modification, are permitted provided that the following conditions are met:
16
+
17
+ * Redistributions of source code must retain the above copyright notice,
18
+ this list of conditions and the following disclaimer.
19
+
20
+ * Redistributions in binary form must reproduce the above copyright
21
+ notice, this list of conditions and the following disclaimer in the
22
+ documentation and/or other materials provided with the distribution.
23
+
24
+ * Neither the name of the University of Cambridge nor the names of its
25
+ contributors may be used to endorse or promote products derived from
26
+ this software without specific prior written permission.
27
+
28
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
+ POSSIBILITY OF SUCH DAMAGE.
39
+ -----------------------------------------------------------------------------
40
+ */
41
+
42
+ #if !(defined SUPPORT_VALGRIND)
43
+
44
+ #if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
45
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \
46
+ || (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64))
47
+
48
+ typedef enum {
49
+ vector_compare_match1,
50
+ vector_compare_match1i,
51
+ vector_compare_match2,
52
+ } vector_compare_type;
53
+
54
+ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
55
+ static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
56
+ {
57
+ #if PCRE2_CODE_UNIT_WIDTH == 8
58
+ /* The AVX2 code path is currently disabled. */
59
+ /* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 31 : 15; */
60
+ return 15;
61
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
62
+ /* The AVX2 code path is currently disabled. */
63
+ /* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 15 : 7; */
64
+ return 7;
65
+ #elif PCRE2_CODE_UNIT_WIDTH == 32
66
+ /* The AVX2 code path is currently disabled. */
67
+ /* return sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? 7 : 3; */
68
+ return 3;
69
+ #else
70
+ #error "Unsupported unit width"
71
+ #endif
72
+ }
73
+ #else /* !SLJIT_CONFIG_X86 */
74
+ static SLJIT_INLINE sljit_s32 max_fast_forward_char_pair_offset(void)
75
+ {
76
+ #if PCRE2_CODE_UNIT_WIDTH == 8
77
+ return 15;
78
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
79
+ return 7;
80
+ #elif PCRE2_CODE_UNIT_WIDTH == 32
81
+ return 3;
82
+ #else
83
+ #error "Unsupported unit width"
84
+ #endif
85
+ }
86
+ #endif /* SLJIT_CONFIG_X86 */
87
+
88
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
89
+ static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
90
+ {
91
+ #if PCRE2_CODE_UNIT_WIDTH == 8
92
+ OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
93
+ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
94
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
95
+ OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
96
+ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
97
+ #else
98
+ #error "Unknown code width"
99
+ #endif
100
+ }
101
+ #endif
102
+
103
+ #endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
104
+
105
+ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
106
+
107
+ static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
108
+ {
109
+ sljit_u32 value = chr;
110
+ #if PCRE2_CODE_UNIT_WIDTH == 8
111
+ #define SIMD_COMPARE_TYPE_INDEX 0
112
+ return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
113
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
114
+ #define SIMD_COMPARE_TYPE_INDEX 1
115
+ return (sljit_s32)((value << 16) | value);
116
+ #elif PCRE2_CODE_UNIT_WIDTH == 32
117
+ #define SIMD_COMPARE_TYPE_INDEX 2
118
+ return (sljit_s32)(value);
119
+ #else
120
+ #error "Unsupported unit width"
121
+ #endif
122
+ }
123
+
124
+ static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
125
+ sljit_s32 reg_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
126
+ {
127
+ sljit_u8 instruction[4];
128
+
129
+ if (reg_type == SLJIT_SIMD_REG_128)
130
+ {
131
+ instruction[0] = 0x66;
132
+ instruction[1] = 0x0f;
133
+ }
134
+ else
135
+ {
136
+ /* Two byte VEX prefix. */
137
+ instruction[0] = 0xc5;
138
+ instruction[1] = 0xfd;
139
+ }
140
+
141
+ SLJIT_ASSERT(step >= 0 && step <= 3);
142
+
143
+ if (compare_type != vector_compare_match2)
144
+ {
145
+ if (step == 0)
146
+ {
147
+ if (compare_type == vector_compare_match1i)
148
+ {
149
+ /* POR xmm1, xmm2/m128 */
150
+ if (reg_type == SLJIT_SIMD_REG_256)
151
+ instruction[1] ^= (dst_ind << 3);
152
+
153
+ /* Prefix is filled. */
154
+ instruction[2] = 0xeb;
155
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
156
+ sljit_emit_op_custom(compiler, instruction, 4);
157
+ }
158
+ return;
159
+ }
160
+
161
+ if (step != 2)
162
+ return;
163
+
164
+ /* PCMPEQB/W/D xmm1, xmm2/m128 */
165
+ if (reg_type == SLJIT_SIMD_REG_256)
166
+ instruction[1] ^= (dst_ind << 3);
167
+
168
+ /* Prefix is filled. */
169
+ instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
170
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
171
+ sljit_emit_op_custom(compiler, instruction, 4);
172
+ return;
173
+ }
174
+
175
+ if (reg_type == SLJIT_SIMD_REG_256)
176
+ {
177
+ if (step == 2)
178
+ return;
179
+
180
+ if (step == 0)
181
+ {
182
+ step = 2;
183
+ instruction[1] ^= (dst_ind << 3);
184
+ }
185
+ }
186
+
187
+ switch (step)
188
+ {
189
+ case 0:
190
+ SLJIT_ASSERT(reg_type == SLJIT_SIMD_REG_128);
191
+
192
+ /* MOVDQA xmm1, xmm2/m128 */
193
+ /* Prefix is filled. */
194
+ instruction[2] = 0x6f;
195
+ instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
196
+ sljit_emit_op_custom(compiler, instruction, 4);
197
+ return;
198
+
199
+ case 1:
200
+ /* PCMPEQB/W/D xmm1, xmm2/m128 */
201
+ if (reg_type == SLJIT_SIMD_REG_256)
202
+ instruction[1] ^= (dst_ind << 3);
203
+
204
+ /* Prefix is filled. */
205
+ instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
206
+ instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
207
+ sljit_emit_op_custom(compiler, instruction, 4);
208
+ return;
209
+
210
+ case 2:
211
+ /* PCMPEQB/W/D xmm1, xmm2/m128 */
212
+ /* Prefix is filled. */
213
+ instruction[2] = 0x74 + SIMD_COMPARE_TYPE_INDEX;
214
+ instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
215
+ sljit_emit_op_custom(compiler, instruction, 4);
216
+ return;
217
+
218
+ case 3:
219
+ /* POR xmm1, xmm2/m128 */
220
+ if (reg_type == SLJIT_SIMD_REG_256)
221
+ instruction[1] ^= (dst_ind << 3);
222
+
223
+ /* Prefix is filled. */
224
+ instruction[2] = 0xeb;
225
+ instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
226
+ sljit_emit_op_custom(compiler, instruction, 4);
227
+ return;
228
+ }
229
+ }
230
+
231
+ #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
232
+
233
+ static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
234
+ {
235
+ DEFINE_COMPILER;
236
+ sljit_u8 instruction[8];
237
+ /* The AVX2 code path is currently disabled. */
238
+ /* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
239
+ sljit_s32 reg_type = SLJIT_SIMD_REG_128;
240
+ sljit_s32 value;
241
+ struct sljit_label *start;
242
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
243
+ struct sljit_label *restart;
244
+ #endif
245
+ struct sljit_jump *quit;
246
+ struct sljit_jump *partial_quit[2];
247
+ vector_compare_type compare_type = vector_compare_match1;
248
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
249
+ sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
250
+ sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
251
+ sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
252
+ sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
253
+ sljit_u32 bit = 0;
254
+ int i;
255
+
256
+ SLJIT_UNUSED_ARG(offset);
257
+
258
+ if (char1 != char2)
259
+ {
260
+ bit = char1 ^ char2;
261
+ compare_type = vector_compare_match1i;
262
+
263
+ if (!is_powerof2(bit))
264
+ {
265
+ bit = 0;
266
+ compare_type = vector_compare_match2;
267
+ }
268
+ }
269
+
270
+ partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
271
+ if (common->mode == PCRE2_JIT_COMPLETE)
272
+ add_jump(compiler, &common->failed_match, partial_quit[0]);
273
+
274
+ /* First part (unaligned start) */
275
+ value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
276
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
277
+
278
+ if (char1 != char2)
279
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
280
+
281
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
282
+
283
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0);
284
+
285
+ if (char1 != char2)
286
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
287
+
288
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
289
+ restart = LABEL();
290
+ #endif
291
+
292
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
293
+ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
294
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
295
+
296
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
297
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
298
+
299
+ for (i = 0; i < 4; i++)
300
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
301
+
302
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
303
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
304
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
305
+
306
+ quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
307
+
308
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
309
+
310
+ /* Second part (aligned) */
311
+ start = LABEL();
312
+
313
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
314
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
315
+
316
+ partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
317
+ if (common->mode == PCRE2_JIT_COMPLETE)
318
+ add_jump(compiler, &common->failed_match, partial_quit[1]);
319
+
320
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
321
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
322
+ for (i = 0; i < 4; i++)
323
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
324
+
325
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
326
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
327
+
328
+ JUMPHERE(quit);
329
+
330
+ SLJIT_ASSERT(tmp1_reg_ind < 8);
331
+ /* BSF r32, r/m32 */
332
+ instruction[0] = 0x0f;
333
+ instruction[1] = 0xbc;
334
+ instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
335
+ sljit_emit_op_custom(compiler, instruction, 3);
336
+
337
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
338
+
339
+ if (common->mode != PCRE2_JIT_COMPLETE)
340
+ {
341
+ JUMPHERE(partial_quit[0]);
342
+ JUMPHERE(partial_quit[1]);
343
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
344
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
345
+ }
346
+ else
347
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
348
+
349
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
350
+ if (common->utf && offset > 0)
351
+ {
352
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
353
+
354
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
355
+
356
+ quit = jump_if_utf_char_start(compiler, TMP1);
357
+
358
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
359
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
360
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
361
+ JUMPTO(SLJIT_JUMP, restart);
362
+
363
+ JUMPHERE(quit);
364
+ }
365
+ #endif
366
+ }
367
+
368
+ #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
369
+
370
+ static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
371
+ {
372
+ DEFINE_COMPILER;
373
+ sljit_u8 instruction[8];
374
+ /* The AVX2 code path is currently disabled. */
375
+ /* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
376
+ sljit_s32 reg_type = SLJIT_SIMD_REG_128;
377
+ sljit_s32 value;
378
+ struct sljit_label *start;
379
+ struct sljit_jump *quit;
380
+ jump_list *not_found = NULL;
381
+ vector_compare_type compare_type = vector_compare_match1;
382
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
383
+ sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
384
+ sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
385
+ sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
386
+ sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
387
+ sljit_u32 bit = 0;
388
+ int i;
389
+
390
+ if (char1 != char2)
391
+ {
392
+ bit = char1 ^ char2;
393
+ compare_type = vector_compare_match1i;
394
+
395
+ if (!is_powerof2(bit))
396
+ {
397
+ bit = 0;
398
+ compare_type = vector_compare_match2;
399
+ }
400
+ }
401
+
402
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
403
+ OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
404
+ OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
405
+
406
+ /* First part (unaligned start) */
407
+
408
+ value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
409
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
410
+
411
+ if (char1 != char2)
412
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
413
+
414
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
415
+
416
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0);
417
+
418
+ if (char1 != char2)
419
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
420
+
421
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
422
+ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
423
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
424
+
425
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
426
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
427
+
428
+ for (i = 0; i < 4; i++)
429
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
430
+
431
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
432
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
433
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
434
+
435
+ quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
436
+
437
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
438
+
439
+ /* Second part (aligned) */
440
+ start = LABEL();
441
+
442
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
443
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
444
+
445
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
446
+
447
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
448
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
449
+
450
+ for (i = 0; i < 4; i++)
451
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
452
+
453
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
454
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
455
+
456
+ JUMPHERE(quit);
457
+
458
+ SLJIT_ASSERT(tmp1_reg_ind < 8);
459
+ /* BSF r32, r/m32 */
460
+ instruction[0] = 0x0f;
461
+ instruction[1] = 0xbc;
462
+ instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
463
+ sljit_emit_op_custom(compiler, instruction, 3);
464
+
465
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
466
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
467
+
468
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
469
+ return not_found;
470
+ }
471
+
472
+ #ifndef _WIN64
473
+
474
+ #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SIMD))
475
+
476
+ static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
477
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
478
+ {
479
+ DEFINE_COMPILER;
480
+ sljit_u8 instruction[8];
481
+ /* The AVX2 code path is currently disabled. */
482
+ /* sljit_s32 reg_type = sljit_has_cpu_feature(SLJIT_HAS_AVX2) ? SLJIT_SIMD_REG_256 : SLJIT_SIMD_REG_128; */
483
+ sljit_s32 reg_type = SLJIT_SIMD_REG_128;
484
+ sljit_s32 value;
485
+ vector_compare_type compare1_type = vector_compare_match1;
486
+ vector_compare_type compare2_type = vector_compare_match1;
487
+ sljit_u32 bit1 = 0;
488
+ sljit_u32 bit2 = 0;
489
+ sljit_u32 diff = IN_UCHARS(offs1 - offs2);
490
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
491
+ sljit_s32 data1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
492
+ sljit_s32 data2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
493
+ sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
494
+ sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
495
+ sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4);
496
+ sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5);
497
+ sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6);
498
+ sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0);
499
+ struct sljit_label *start;
500
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
501
+ struct sljit_label *restart;
502
+ #endif
503
+ struct sljit_jump *jump[2];
504
+ int i;
505
+
506
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2 && offs2 >= 0);
507
+ SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
508
+
509
+ /* Initialize. */
510
+ if (common->match_end_ptr != 0)
511
+ {
512
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
513
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
514
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
515
+
516
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
517
+ SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
518
+ }
519
+
520
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
521
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
522
+
523
+ if (char1a == char1b)
524
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
525
+ else
526
+ {
527
+ bit1 = char1a ^ char1b;
528
+ if (is_powerof2(bit1))
529
+ {
530
+ compare1_type = vector_compare_match1i;
531
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
532
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
533
+ }
534
+ else
535
+ {
536
+ compare1_type = vector_compare_match2;
537
+ bit1 = 0;
538
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
539
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
540
+ }
541
+ }
542
+
543
+ value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
544
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, TMP1, 0);
545
+
546
+ if (char1a != char1b)
547
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR4, 0, TMP2, 0);
548
+
549
+ if (char2a == char2b)
550
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
551
+ else
552
+ {
553
+ bit2 = char2a ^ char2b;
554
+ if (is_powerof2(bit2))
555
+ {
556
+ compare2_type = vector_compare_match1i;
557
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
558
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
559
+ }
560
+ else
561
+ {
562
+ compare2_type = vector_compare_match2;
563
+ bit2 = 0;
564
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
565
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
566
+ }
567
+ }
568
+
569
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR3, 0, TMP1, 0);
570
+
571
+ if (char2a != char2b)
572
+ sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR5, 0, TMP2, 0);
573
+
574
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
575
+ if (char1a != char1b)
576
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR4, SLJIT_FR4, 0);
577
+
578
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR3, SLJIT_FR3, 0);
579
+ if (char2a != char2b)
580
+ sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR5, SLJIT_FR5, 0);
581
+
582
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
583
+ restart = LABEL();
584
+ #endif
585
+
586
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
587
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
588
+ value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
589
+ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
590
+
591
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
592
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
593
+
594
+ jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
595
+
596
+ sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
597
+ jump[1] = JUMP(SLJIT_JUMP);
598
+
599
+ JUMPHERE(jump[0]);
600
+
601
+ if (reg_type == SLJIT_SIMD_REG_256)
602
+ {
603
+ if (diff != 16)
604
+ {
605
+ /* PSLLDQ ymm1, ymm2, imm8 */
606
+ instruction[0] = 0xc5;
607
+ instruction[1] = (sljit_u8)(0xf9 ^ (data2_ind << 3));
608
+ instruction[2] = 0x73;
609
+ instruction[3] = 0xc0 | (7 << 3) | data1_ind;
610
+ instruction[4] = diff & 0xf;
611
+ sljit_emit_op_custom(compiler, instruction, 5);
612
+ }
613
+
614
+ instruction[0] = 0xc4;
615
+ instruction[1] = 0xe3;
616
+ if (diff < 16)
617
+ {
618
+ /* VINSERTI128 xmm1, xmm2, xmm3/m128 */
619
+ /* instruction[0] = 0xc4; */
620
+ /* instruction[1] = 0xe3; */
621
+ instruction[2] = (sljit_u8)(0x7d ^ (data2_ind << 3));
622
+ instruction[3] = 0x38;
623
+ SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR) <= 7);
624
+ instruction[4] = 0x40 | (data2_ind << 3) | sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
625
+ instruction[5] = (sljit_u8)(16 - diff);
626
+ instruction[6] = 1;
627
+ sljit_emit_op_custom(compiler, instruction, 7);
628
+ }
629
+ else
630
+ {
631
+ /* VPERM2I128 xmm1, xmm2, xmm3/m128 */
632
+ /* instruction[0] = 0xc4; */
633
+ /* instruction[1] = 0xe3; */
634
+ value = (diff == 16) ? data1_ind : data2_ind;
635
+ instruction[2] = (sljit_u8)(0x7d ^ (value << 3));
636
+ instruction[3] = 0x46;
637
+ instruction[4] = 0xc0 | (data2_ind << 3) | value;
638
+ instruction[5] = 0x08;
639
+ sljit_emit_op_custom(compiler, instruction, 6);
640
+ }
641
+ }
642
+ else
643
+ {
644
+ /* MOVDQA xmm1, xmm2/m128 */
645
+ instruction[0] = 0x66;
646
+ instruction[1] = 0x0f;
647
+ instruction[2] = 0x6f;
648
+ instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
649
+ sljit_emit_op_custom(compiler, instruction, 4);
650
+
651
+ /* PSLLDQ xmm1, imm8 */
652
+ /* instruction[0] = 0x66; */
653
+ /* instruction[1] = 0x0f; */
654
+ instruction[2] = 0x73;
655
+ instruction[3] = 0xc0 | (7 << 3) | data2_ind;
656
+ instruction[4] = diff;
657
+ sljit_emit_op_custom(compiler, instruction, 5);
658
+ }
659
+
660
+ JUMPHERE(jump[1]);
661
+
662
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
663
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
664
+
665
+ for (i = 0; i < 4; i++)
666
+ {
667
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
668
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
669
+ }
670
+
671
+ sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
672
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
673
+
674
+ /* Ignore matches before the first STR_PTR. */
675
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
676
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
677
+
678
+ jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
679
+
680
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
681
+
682
+ /* Main loop. */
683
+ start = LABEL();
684
+
685
+ value = (reg_type == SLJIT_SIMD_REG_256) ? 32 : 16;
686
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
687
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
688
+
689
+ value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
690
+ sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
691
+ sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
692
+
693
+ for (i = 0; i < 4; i++)
694
+ {
695
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
696
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
697
+ }
698
+
699
+ sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
700
+ sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
701
+
702
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
703
+
704
+ JUMPHERE(jump[0]);
705
+
706
+ SLJIT_ASSERT(tmp1_reg_ind < 8);
707
+ /* BSF r32, r/m32 */
708
+ instruction[0] = 0x0f;
709
+ instruction[1] = 0xbc;
710
+ instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
711
+ sljit_emit_op_custom(compiler, instruction, 3);
712
+
713
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
714
+
715
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
716
+
717
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
718
+ if (common->utf)
719
+ {
720
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
721
+
722
+ jump[0] = jump_if_utf_char_start(compiler, TMP1);
723
+
724
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
725
+ CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
726
+
727
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
728
+
729
+ JUMPHERE(jump[0]);
730
+ }
731
+ #endif
732
+
733
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
734
+
735
+ if (common->match_end_ptr != 0)
736
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
737
+ }
738
+
739
+ #endif /* !_WIN64 */
740
+
741
+ #undef SIMD_COMPARE_TYPE_INDEX
742
+
743
+ #endif /* SLJIT_CONFIG_X86 */
744
+
745
+ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
746
+
747
+ #include <arm_neon.h>
748
+
749
+ typedef union {
750
+ unsigned int x;
751
+ struct { unsigned char c1, c2, c3, c4; } c;
752
+ } int_char;
753
+
754
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
755
+ static SLJIT_INLINE int utf_continue(PCRE2_SPTR s)
756
+ {
757
+ #if PCRE2_CODE_UNIT_WIDTH == 8
758
+ return (*s & 0xc0) == 0x80;
759
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
760
+ return (*s & 0xfc00) == 0xdc00;
761
+ #else
762
+ #error "Unknown code width"
763
+ #endif
764
+ }
765
+ #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
766
+
767
+ #if PCRE2_CODE_UNIT_WIDTH == 8
768
+ # define VECTOR_FACTOR 16
769
+ # define vect_t uint8x16_t
770
+ # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
771
+ # define VCEQQ vceqq_u8
772
+ # define VORRQ vorrq_u8
773
+ # define VST1Q vst1q_u8
774
+ # define VDUPQ vdupq_n_u8
775
+ # define VEXTQ vextq_u8
776
+ # define VANDQ vandq_u8
777
+ typedef union {
778
+ uint8_t mem[16];
779
+ uint64_t dw[2];
780
+ } quad_word;
781
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
782
+ # define VECTOR_FACTOR 8
783
+ # define vect_t uint16x8_t
784
+ # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
785
+ # define VCEQQ vceqq_u16
786
+ # define VORRQ vorrq_u16
787
+ # define VST1Q vst1q_u16
788
+ # define VDUPQ vdupq_n_u16
789
+ # define VEXTQ vextq_u16
790
+ # define VANDQ vandq_u16
791
+ typedef union {
792
+ uint16_t mem[8];
793
+ uint64_t dw[2];
794
+ } quad_word;
795
+ #else
796
+ # define VECTOR_FACTOR 4
797
+ # define vect_t uint32x4_t
798
+ # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
799
+ # define VCEQQ vceqq_u32
800
+ # define VORRQ vorrq_u32
801
+ # define VST1Q vst1q_u32
802
+ # define VDUPQ vdupq_n_u32
803
+ # define VEXTQ vextq_u32
804
+ # define VANDQ vandq_u32
805
+ typedef union {
806
+ uint32_t mem[4];
807
+ uint64_t dw[2];
808
+ } quad_word;
809
+ #endif
810
+
811
+ #define FFCS
812
+ #include "pcre2_jit_neon_inc.h"
813
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
814
+ # define FF_UTF
815
+ # include "pcre2_jit_neon_inc.h"
816
+ # undef FF_UTF
817
+ #endif
818
+ #undef FFCS
819
+
820
+ #define FFCS_2
821
+ #include "pcre2_jit_neon_inc.h"
822
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
823
+ # define FF_UTF
824
+ # include "pcre2_jit_neon_inc.h"
825
+ # undef FF_UTF
826
+ #endif
827
+ #undef FFCS_2
828
+
829
+ #define FFCS_MASK
830
+ #include "pcre2_jit_neon_inc.h"
831
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
832
+ # define FF_UTF
833
+ # include "pcre2_jit_neon_inc.h"
834
+ # undef FF_UTF
835
+ #endif
836
+ #undef FFCS_MASK
837
+
838
+ #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
839
+
840
+ static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
841
+ {
842
+ DEFINE_COMPILER;
843
+ int_char ic;
844
+ struct sljit_jump *partial_quit, *quit;
845
+ /* Save temporary registers. */
846
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
847
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
848
+
849
+ /* Prepare function arguments */
850
+ OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
851
+ GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
852
+ OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
853
+
854
+ if (char1 == char2)
855
+ {
856
+ ic.c.c1 = char1;
857
+ ic.c.c2 = char2;
858
+ OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
859
+
860
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
861
+ if (common->utf && offset > 0)
862
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
863
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
864
+ else
865
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
866
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
867
+ #else
868
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
869
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
870
+ #endif
871
+ }
872
+ else
873
+ {
874
+ PCRE2_UCHAR mask = char1 ^ char2;
875
+ if (is_powerof2(mask))
876
+ {
877
+ ic.c.c1 = char1 | mask;
878
+ ic.c.c2 = mask;
879
+ OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
880
+
881
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
882
+ if (common->utf && offset > 0)
883
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
884
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
885
+ else
886
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
887
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
888
+ #else
889
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
890
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
891
+ #endif
892
+ }
893
+ else
894
+ {
895
+ ic.c.c1 = char1;
896
+ ic.c.c2 = char2;
897
+ OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
898
+
899
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
900
+ if (common->utf && offset > 0)
901
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
902
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
903
+ else
904
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
905
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
906
+ #else
907
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
908
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
909
+ #endif
910
+ }
911
+ }
912
+ /* Restore registers. */
913
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
914
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
915
+
916
+ /* Check return value. */
917
+ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
918
+ if (common->mode == PCRE2_JIT_COMPLETE)
919
+ add_jump(compiler, &common->failed_match, partial_quit);
920
+
921
+ /* Fast forward STR_PTR to the result of memchr. */
922
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
923
+ if (common->mode != PCRE2_JIT_COMPLETE)
924
+ {
925
+ quit = CMP(SLJIT_NOT_ZERO, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
926
+ JUMPHERE(partial_quit);
927
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
928
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
929
+ JUMPHERE(quit);
930
+ }
931
+ }
932
+
933
+ typedef enum {
934
+ compare_match1,
935
+ compare_match1i,
936
+ compare_match2,
937
+ } compare_type;
938
+
939
+ static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
940
+ {
941
+ if (ctype == compare_match2)
942
+ {
943
+ vect_t tmp = dst;
944
+ dst = VCEQQ(dst, cmp1);
945
+ tmp = VCEQQ(tmp, cmp2);
946
+ dst = VORRQ(dst, tmp);
947
+ return dst;
948
+ }
949
+
950
+ if (ctype == compare_match1i)
951
+ dst = VORRQ(dst, cmp2);
952
+ dst = VCEQQ(dst, cmp1);
953
+ return dst;
954
+ }
955
+
956
+ static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
957
+ {
958
+ #if PCRE2_CODE_UNIT_WIDTH == 8
959
+ return 15;
960
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
961
+ return 7;
962
+ #elif PCRE2_CODE_UNIT_WIDTH == 32
963
+ return 3;
964
+ #else
965
+ #error "Unsupported unit width"
966
+ #endif
967
+ }
968
+
969
+ /* ARM doesn't have a shift left across lanes. */
970
+ static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
971
+ {
972
+ vect_t zero = VDUPQ(0);
973
+ SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
974
+ /* VEXTQ takes an immediate as last argument. */
975
+ #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
976
+ switch (n)
977
+ {
978
+ C(1); C(2); C(3);
979
+ #if PCRE2_CODE_UNIT_WIDTH != 32
980
+ C(4); C(5); C(6); C(7);
981
+ # if PCRE2_CODE_UNIT_WIDTH != 16
982
+ C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
983
+ # endif
984
+ #endif
985
+ default:
986
+ /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
987
+ happen. The return is still here for compilers to not warn. */
988
+ return a;
989
+ }
990
+ }
991
+
992
+ #define FFCPS
993
+ #define FFCPS_DIFF1
994
+ #define FFCPS_CHAR1A2A
995
+
996
+ #define FFCPS_0
997
+ #include "pcre2_jit_neon_inc.h"
998
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
999
+ # define FF_UTF
1000
+ # include "pcre2_jit_neon_inc.h"
1001
+ # undef FF_UTF
1002
+ #endif
1003
+ #undef FFCPS_0
1004
+
1005
+ #undef FFCPS_CHAR1A2A
1006
+
1007
+ #define FFCPS_1
1008
+ #include "pcre2_jit_neon_inc.h"
1009
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1010
+ # define FF_UTF
1011
+ # include "pcre2_jit_neon_inc.h"
1012
+ # undef FF_UTF
1013
+ #endif
1014
+ #undef FFCPS_1
1015
+
1016
+ #undef FFCPS_DIFF1
1017
+
1018
+ #define FFCPS_DEFAULT
1019
+ #include "pcre2_jit_neon_inc.h"
1020
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1021
+ # define FF_UTF
1022
+ # include "pcre2_jit_neon_inc.h"
1023
+ # undef FF_UTF
1024
+ #endif
1025
+ #undef FFCPS
1026
+
1027
+ #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1028
+
1029
+ static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1030
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1031
+ {
1032
+ DEFINE_COMPILER;
1033
+ sljit_u32 diff = IN_UCHARS(offs1 - offs2);
1034
+ struct sljit_jump *partial_quit;
1035
+ int_char ic;
1036
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1037
+ SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
1038
+ SLJIT_ASSERT(compiler->scratches == 5);
1039
+
1040
+ /* Save temporary register STR_PTR. */
1041
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
1042
+
1043
+ /* Prepare arguments for the function call. */
1044
+ if (common->match_end_ptr == 0)
1045
+ OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
1046
+ else
1047
+ {
1048
+ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1049
+ OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1050
+
1051
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
1052
+ SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
1053
+ }
1054
+
1055
+ GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
1056
+ OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
1057
+ OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
1058
+ ic.c.c1 = char1a;
1059
+ ic.c.c2 = char1b;
1060
+ ic.c.c3 = char2a;
1061
+ ic.c.c4 = char2b;
1062
+ OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
1063
+
1064
+ if (diff == 1) {
1065
+ if (char1a == char1b && char2a == char2b) {
1066
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1067
+ if (common->utf)
1068
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1069
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
1070
+ else
1071
+ #endif
1072
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1073
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
1074
+ } else {
1075
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1076
+ if (common->utf)
1077
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1078
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
1079
+ else
1080
+ #endif
1081
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1082
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
1083
+ }
1084
+ } else {
1085
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1086
+ if (common->utf)
1087
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1088
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
1089
+ else
1090
+ #endif
1091
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
1092
+ SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
1093
+ }
1094
+
1095
+ /* Restore STR_PTR register. */
1096
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
1097
+
1098
+ /* Check return value. */
1099
+ partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
1100
+ add_jump(compiler, &common->failed_match, partial_quit);
1101
+
1102
+ /* Fast forward STR_PTR to the result of memchr. */
1103
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
1104
+
1105
+ JUMPHERE(partial_quit);
1106
+ }
1107
+
1108
+ #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
1109
+
1110
+ #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
1111
+
1112
+ #if PCRE2_CODE_UNIT_WIDTH == 8
1113
+ #define VECTOR_ELEMENT_SIZE 0
1114
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
1115
+ #define VECTOR_ELEMENT_SIZE 1
1116
+ #elif PCRE2_CODE_UNIT_WIDTH == 32
1117
+ #define VECTOR_ELEMENT_SIZE 2
1118
+ #else
1119
+ #error "Unsupported unit width"
1120
+ #endif
1121
+
1122
+ static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
1123
+ sljit_s32 base_reg, sljit_s32 index_reg)
1124
+ {
1125
+ sljit_u16 instruction[3];
1126
+
1127
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
1128
+ instruction[1] = (sljit_u16)(base_reg << 12);
1129
+ instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
1130
+
1131
+ sljit_emit_op_custom(compiler, instruction, 6);
1132
+ }
1133
+
1134
+ #if PCRE2_CODE_UNIT_WIDTH == 32
1135
+
1136
+ static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg,
1137
+ PCRE2_UCHAR chr, sljit_s32 tmp_general_reg)
1138
+ {
1139
+ sljit_u16 instruction[3];
1140
+
1141
+ SLJIT_ASSERT(step >= 0 && step <= 1);
1142
+
1143
+ if (chr < 0x7fff)
1144
+ {
1145
+ if (step == 1)
1146
+ return;
1147
+
1148
+ /* VREPI */
1149
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
1150
+ instruction[1] = (sljit_u16)chr;
1151
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1152
+ sljit_emit_op_custom(compiler, instruction, 6);
1153
+ return;
1154
+ }
1155
+
1156
+ if (step == 0)
1157
+ {
1158
+ OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr);
1159
+
1160
+ /* VLVG */
1161
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(SLJIT_GP_REGISTER, tmp_general_reg));
1162
+ instruction[1] = 0;
1163
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22);
1164
+ sljit_emit_op_custom(compiler, instruction, 6);
1165
+ return;
1166
+ }
1167
+
1168
+ /* VREP */
1169
+ instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg);
1170
+ instruction[1] = 0;
1171
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d);
1172
+ sljit_emit_op_custom(compiler, instruction, 6);
1173
+ }
1174
+
1175
+ #endif
1176
+
1177
+ static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
1178
+ int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
1179
+ {
1180
+ sljit_u16 instruction[3];
1181
+
1182
+ SLJIT_ASSERT(step >= 0 && step <= 2);
1183
+
1184
+ if (step == 1)
1185
+ {
1186
+ /* VCEQ */
1187
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1188
+ instruction[1] = (sljit_u16)(cmp1_ind << 12);
1189
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1190
+ sljit_emit_op_custom(compiler, instruction, 6);
1191
+ return;
1192
+ }
1193
+
1194
+ if (compare_type != vector_compare_match2)
1195
+ {
1196
+ if (step == 0 && compare_type == vector_compare_match1i)
1197
+ {
1198
+ /* VO */
1199
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1200
+ instruction[1] = (sljit_u16)(cmp2_ind << 12);
1201
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
1202
+ sljit_emit_op_custom(compiler, instruction, 6);
1203
+ }
1204
+ return;
1205
+ }
1206
+
1207
+ switch (step)
1208
+ {
1209
+ case 0:
1210
+ /* VCEQ */
1211
+ instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind);
1212
+ instruction[1] = (sljit_u16)(cmp2_ind << 12);
1213
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8);
1214
+ sljit_emit_op_custom(compiler, instruction, 6);
1215
+ return;
1216
+
1217
+ case 2:
1218
+ /* VO */
1219
+ instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind);
1220
+ instruction[1] = (sljit_u16)(tmp_ind << 12);
1221
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x6a);
1222
+ sljit_emit_op_custom(compiler, instruction, 6);
1223
+ return;
1224
+ }
1225
+ }
1226
+
1227
+ #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
1228
+
1229
+ static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
1230
+ {
1231
+ DEFINE_COMPILER;
1232
+ sljit_u16 instruction[3];
1233
+ struct sljit_label *start;
1234
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1235
+ struct sljit_label *restart;
1236
+ #endif
1237
+ struct sljit_jump *quit;
1238
+ struct sljit_jump *partial_quit[2];
1239
+ vector_compare_type compare_type = vector_compare_match1;
1240
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1241
+ sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1242
+ sljit_s32 data_ind = 0;
1243
+ sljit_s32 tmp_ind = 1;
1244
+ sljit_s32 cmp1_ind = 2;
1245
+ sljit_s32 cmp2_ind = 3;
1246
+ sljit_s32 zero_ind = 4;
1247
+ sljit_u32 bit = 0;
1248
+ int i;
1249
+
1250
+ SLJIT_UNUSED_ARG(offset);
1251
+
1252
+ if (char1 != char2)
1253
+ {
1254
+ bit = char1 ^ char2;
1255
+ compare_type = vector_compare_match1i;
1256
+
1257
+ if (!is_powerof2(bit))
1258
+ {
1259
+ bit = 0;
1260
+ compare_type = vector_compare_match2;
1261
+ }
1262
+ }
1263
+
1264
+ partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1265
+ if (common->mode == PCRE2_JIT_COMPLETE)
1266
+ add_jump(compiler, &common->failed_match, partial_quit[0]);
1267
+
1268
+ /* First part (unaligned start) */
1269
+
1270
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
1271
+
1272
+ #if PCRE2_CODE_UNIT_WIDTH != 32
1273
+
1274
+ /* VREPI */
1275
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
1276
+ instruction[1] = (sljit_u16)(char1 | bit);
1277
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1278
+ sljit_emit_op_custom(compiler, instruction, 6);
1279
+
1280
+ if (char1 != char2)
1281
+ {
1282
+ /* VREPI */
1283
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
1284
+ instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
1285
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1286
+ sljit_emit_op_custom(compiler, instruction, 6);
1287
+ }
1288
+
1289
+ #else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1290
+
1291
+ for (int i = 0; i < 2; i++)
1292
+ {
1293
+ replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
1294
+
1295
+ if (char1 != char2)
1296
+ replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
1297
+ }
1298
+
1299
+ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1300
+
1301
+ if (compare_type == vector_compare_match2)
1302
+ {
1303
+ /* VREPI */
1304
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1305
+ instruction[1] = 0;
1306
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1307
+ sljit_emit_op_custom(compiler, instruction, 6);
1308
+ }
1309
+
1310
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1311
+ restart = LABEL();
1312
+ #endif
1313
+
1314
+ load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
1315
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
1316
+
1317
+ if (compare_type != vector_compare_match2)
1318
+ {
1319
+ if (compare_type == vector_compare_match1i)
1320
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1321
+
1322
+ /* VFEE */
1323
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1324
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1325
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1326
+ sljit_emit_op_custom(compiler, instruction, 6);
1327
+ }
1328
+ else
1329
+ {
1330
+ for (i = 0; i < 3; i++)
1331
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1332
+
1333
+ /* VFENE */
1334
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1335
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1336
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1337
+ sljit_emit_op_custom(compiler, instruction, 6);
1338
+ }
1339
+
1340
+ /* VLGVB */
1341
+ instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1342
+ instruction[1] = 7;
1343
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1344
+ sljit_emit_op_custom(compiler, instruction, 6);
1345
+
1346
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1347
+ quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
1348
+
1349
+ OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
1350
+
1351
+ /* Second part (aligned) */
1352
+ start = LABEL();
1353
+
1354
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1355
+
1356
+ partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1357
+ if (common->mode == PCRE2_JIT_COMPLETE)
1358
+ add_jump(compiler, &common->failed_match, partial_quit[1]);
1359
+
1360
+ load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
1361
+
1362
+ if (compare_type != vector_compare_match2)
1363
+ {
1364
+ if (compare_type == vector_compare_match1i)
1365
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1366
+
1367
+ /* VFEE */
1368
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1369
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1370
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1371
+ sljit_emit_op_custom(compiler, instruction, 6);
1372
+ }
1373
+ else
1374
+ {
1375
+ for (i = 0; i < 3; i++)
1376
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1377
+
1378
+ /* VFENE */
1379
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1380
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1381
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1382
+ sljit_emit_op_custom(compiler, instruction, 6);
1383
+ }
1384
+
1385
+ sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1386
+ JUMPTO(SLJIT_OVERFLOW, start);
1387
+
1388
+ /* VLGVB */
1389
+ instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind);
1390
+ instruction[1] = 7;
1391
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1392
+ sljit_emit_op_custom(compiler, instruction, 6);
1393
+
1394
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1395
+
1396
+ JUMPHERE(quit);
1397
+
1398
+ if (common->mode != PCRE2_JIT_COMPLETE)
1399
+ {
1400
+ JUMPHERE(partial_quit[0]);
1401
+ JUMPHERE(partial_quit[1]);
1402
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
1403
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
1404
+ }
1405
+ else
1406
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1407
+
1408
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1409
+ if (common->utf && offset > 0)
1410
+ {
1411
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
1412
+
1413
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
1414
+
1415
+ quit = jump_if_utf_char_start(compiler, TMP1);
1416
+
1417
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1418
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1419
+
1420
+ OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16);
1421
+ JUMPTO(SLJIT_JUMP, restart);
1422
+
1423
+ JUMPHERE(quit);
1424
+ }
1425
+ #endif
1426
+ }
1427
+
1428
+ #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1
1429
+
1430
+ static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
1431
+ {
1432
+ DEFINE_COMPILER;
1433
+ sljit_u16 instruction[3];
1434
+ struct sljit_label *start;
1435
+ struct sljit_jump *quit;
1436
+ jump_list *not_found = NULL;
1437
+ vector_compare_type compare_type = vector_compare_match1;
1438
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1439
+ sljit_s32 tmp3_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP3);
1440
+ sljit_s32 data_ind = 0;
1441
+ sljit_s32 tmp_ind = 1;
1442
+ sljit_s32 cmp1_ind = 2;
1443
+ sljit_s32 cmp2_ind = 3;
1444
+ sljit_s32 zero_ind = 4;
1445
+ sljit_u32 bit = 0;
1446
+ int i;
1447
+
1448
+ if (char1 != char2)
1449
+ {
1450
+ bit = char1 ^ char2;
1451
+ compare_type = vector_compare_match1i;
1452
+
1453
+ if (!is_powerof2(bit))
1454
+ {
1455
+ bit = 0;
1456
+ compare_type = vector_compare_match2;
1457
+ }
1458
+ }
1459
+
1460
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1461
+
1462
+ /* First part (unaligned start) */
1463
+
1464
+ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16);
1465
+
1466
+ #if PCRE2_CODE_UNIT_WIDTH != 32
1467
+
1468
+ /* VREPI */
1469
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4));
1470
+ instruction[1] = (sljit_u16)(char1 | bit);
1471
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1472
+ sljit_emit_op_custom(compiler, instruction, 6);
1473
+
1474
+ if (char1 != char2)
1475
+ {
1476
+ /* VREPI */
1477
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
1478
+ instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
1479
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1480
+ sljit_emit_op_custom(compiler, instruction, 6);
1481
+ }
1482
+
1483
+ #else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1484
+
1485
+ for (int i = 0; i < 2; i++)
1486
+ {
1487
+ replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3);
1488
+
1489
+ if (char1 != char2)
1490
+ replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3);
1491
+ }
1492
+
1493
+ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1494
+
1495
+ if (compare_type == vector_compare_match2)
1496
+ {
1497
+ /* VREPI */
1498
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1499
+ instruction[1] = 0;
1500
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1501
+ sljit_emit_op_custom(compiler, instruction, 6);
1502
+ }
1503
+
1504
+ load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
1505
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
1506
+
1507
+ if (compare_type != vector_compare_match2)
1508
+ {
1509
+ if (compare_type == vector_compare_match1i)
1510
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1511
+
1512
+ /* VFEE */
1513
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1514
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1515
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1516
+ sljit_emit_op_custom(compiler, instruction, 6);
1517
+ }
1518
+ else
1519
+ {
1520
+ for (i = 0; i < 3; i++)
1521
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1522
+
1523
+ /* VFENE */
1524
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1525
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1526
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1527
+ sljit_emit_op_custom(compiler, instruction, 6);
1528
+ }
1529
+
1530
+ /* VLGVB */
1531
+ instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1532
+ instruction[1] = 7;
1533
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1534
+ sljit_emit_op_custom(compiler, instruction, 6);
1535
+
1536
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1537
+ quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
1538
+
1539
+ OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16);
1540
+
1541
+ /* Second part (aligned) */
1542
+ start = LABEL();
1543
+
1544
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16);
1545
+
1546
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1547
+
1548
+ load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0);
1549
+
1550
+ if (compare_type != vector_compare_match2)
1551
+ {
1552
+ if (compare_type == vector_compare_match1i)
1553
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1554
+
1555
+ /* VFEE */
1556
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1557
+ instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4));
1558
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80);
1559
+ sljit_emit_op_custom(compiler, instruction, 6);
1560
+ }
1561
+ else
1562
+ {
1563
+ for (i = 0; i < 3; i++)
1564
+ fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1565
+
1566
+ /* VFENE */
1567
+ instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind);
1568
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1569
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1570
+ sljit_emit_op_custom(compiler, instruction, 6);
1571
+ }
1572
+
1573
+ sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1574
+ JUMPTO(SLJIT_OVERFLOW, start);
1575
+
1576
+ /* VLGVB */
1577
+ instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind);
1578
+ instruction[1] = 7;
1579
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1580
+ sljit_emit_op_custom(compiler, instruction, 6);
1581
+
1582
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
1583
+
1584
+ JUMPHERE(quit);
1585
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
1586
+
1587
+ return not_found;
1588
+ }
1589
+
1590
+ #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
1591
+
1592
+ static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
1593
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
1594
+ {
1595
+ DEFINE_COMPILER;
1596
+ sljit_u16 instruction[3];
1597
+ struct sljit_label *start;
1598
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1599
+ struct sljit_label *restart;
1600
+ #endif
1601
+ struct sljit_jump *quit;
1602
+ struct sljit_jump *jump[2];
1603
+ vector_compare_type compare1_type = vector_compare_match1;
1604
+ vector_compare_type compare2_type = vector_compare_match1;
1605
+ sljit_u32 bit1 = 0;
1606
+ sljit_u32 bit2 = 0;
1607
+ sljit_s32 diff = IN_UCHARS(offs2 - offs1);
1608
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1609
+ sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
1610
+ sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1611
+ sljit_s32 data1_ind = 0;
1612
+ sljit_s32 data2_ind = 1;
1613
+ sljit_s32 tmp1_ind = 2;
1614
+ sljit_s32 tmp2_ind = 3;
1615
+ sljit_s32 cmp1a_ind = 4;
1616
+ sljit_s32 cmp1b_ind = 5;
1617
+ sljit_s32 cmp2a_ind = 6;
1618
+ sljit_s32 cmp2b_ind = 7;
1619
+ sljit_s32 zero_ind = 8;
1620
+ int i;
1621
+
1622
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
1623
+ SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
1624
+ SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
1625
+
1626
+ if (char1a != char1b)
1627
+ {
1628
+ bit1 = char1a ^ char1b;
1629
+ compare1_type = vector_compare_match1i;
1630
+
1631
+ if (!is_powerof2(bit1))
1632
+ {
1633
+ bit1 = 0;
1634
+ compare1_type = vector_compare_match2;
1635
+ }
1636
+ }
1637
+
1638
+ if (char2a != char2b)
1639
+ {
1640
+ bit2 = char2a ^ char2b;
1641
+ compare2_type = vector_compare_match1i;
1642
+
1643
+ if (!is_powerof2(bit2))
1644
+ {
1645
+ bit2 = 0;
1646
+ compare2_type = vector_compare_match2;
1647
+ }
1648
+ }
1649
+
1650
+ /* Initialize. */
1651
+ if (common->match_end_ptr != 0)
1652
+ {
1653
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
1654
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
1655
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
1656
+
1657
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
1658
+ SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
1659
+ }
1660
+
1661
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
1662
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1663
+ OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
1664
+
1665
+ #if PCRE2_CODE_UNIT_WIDTH != 32
1666
+
1667
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1668
+
1669
+ /* VREPI */
1670
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
1671
+ instruction[1] = (sljit_u16)(char1a | bit1);
1672
+ instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
1673
+ sljit_emit_op_custom(compiler, instruction, 6);
1674
+
1675
+ if (char1a != char1b)
1676
+ {
1677
+ /* VREPI */
1678
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
1679
+ instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
1680
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1681
+ sljit_emit_op_custom(compiler, instruction, 6);
1682
+ }
1683
+
1684
+ /* VREPI */
1685
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
1686
+ instruction[1] = (sljit_u16)(char2a | bit2);
1687
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1688
+ sljit_emit_op_custom(compiler, instruction, 6);
1689
+
1690
+ if (char2a != char2b)
1691
+ {
1692
+ /* VREPI */
1693
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
1694
+ instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
1695
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
1696
+ sljit_emit_op_custom(compiler, instruction, 6);
1697
+ }
1698
+
1699
+ #else /* PCRE2_CODE_UNIT_WIDTH == 32 */
1700
+
1701
+ for (int i = 0; i < 2; i++)
1702
+ {
1703
+ replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
1704
+
1705
+ if (char1a != char1b)
1706
+ replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
1707
+
1708
+ replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
1709
+
1710
+ if (char2a != char2b)
1711
+ replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
1712
+ }
1713
+
1714
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1715
+
1716
+ #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
1717
+
1718
+ /* VREPI */
1719
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
1720
+ instruction[1] = 0;
1721
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
1722
+ sljit_emit_op_custom(compiler, instruction, 6);
1723
+
1724
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1725
+ restart = LABEL();
1726
+ #endif
1727
+
1728
+ jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
1729
+ load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
1730
+ jump[1] = JUMP(SLJIT_JUMP);
1731
+ JUMPHERE(jump[0]);
1732
+ load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
1733
+ JUMPHERE(jump[1]);
1734
+
1735
+ load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
1736
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
1737
+
1738
+ for (i = 0; i < 3; i++)
1739
+ {
1740
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1741
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1742
+ }
1743
+
1744
+ /* VN */
1745
+ instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1746
+ instruction[1] = (sljit_u16)(data2_ind << 12);
1747
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
1748
+ sljit_emit_op_custom(compiler, instruction, 6);
1749
+
1750
+ /* VFENE */
1751
+ instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1752
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1753
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1754
+ sljit_emit_op_custom(compiler, instruction, 6);
1755
+
1756
+ /* VLGVB */
1757
+ instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
1758
+ instruction[1] = 7;
1759
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1760
+ sljit_emit_op_custom(compiler, instruction, 6);
1761
+
1762
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1763
+ quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
1764
+
1765
+ OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
1766
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
1767
+
1768
+ /* Main loop. */
1769
+ start = LABEL();
1770
+
1771
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1772
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1773
+
1774
+ load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
1775
+ load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
1776
+
1777
+ for (i = 0; i < 3; i++)
1778
+ {
1779
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
1780
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
1781
+ }
1782
+
1783
+ /* VN */
1784
+ instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1785
+ instruction[1] = (sljit_u16)(data2_ind << 12);
1786
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
1787
+ sljit_emit_op_custom(compiler, instruction, 6);
1788
+
1789
+ /* VFENE */
1790
+ instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
1791
+ instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
1792
+ instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
1793
+ sljit_emit_op_custom(compiler, instruction, 6);
1794
+
1795
+ sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW);
1796
+ JUMPTO(SLJIT_OVERFLOW, start);
1797
+
1798
+ /* VLGVB */
1799
+ instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
1800
+ instruction[1] = 7;
1801
+ instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
1802
+ sljit_emit_op_custom(compiler, instruction, 6);
1803
+
1804
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1805
+
1806
+ JUMPHERE(quit);
1807
+
1808
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1809
+
1810
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1811
+ if (common->utf)
1812
+ {
1813
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
1814
+
1815
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
1816
+
1817
+ quit = jump_if_utf_char_start(compiler, TMP1);
1818
+
1819
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1820
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1821
+
1822
+ /* TMP1 contains diff. */
1823
+ OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
1824
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
1825
+ JUMPTO(SLJIT_JUMP, restart);
1826
+
1827
+ JUMPHERE(quit);
1828
+ }
1829
+ #endif
1830
+
1831
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
1832
+
1833
+ if (common->match_end_ptr != 0)
1834
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
1835
+ }
1836
+
1837
+ #endif /* SLJIT_CONFIG_S390X */
1838
+
1839
+ #if (defined SLJIT_CONFIG_LOONGARCH_64 && SLJIT_CONFIG_LOONGARCH_64)
1840
+
1841
+ #ifdef __linux__
1842
+ /* Using getauxval(AT_HWCAP) under Linux for detecting whether LSX is available */
1843
+ #include <sys/auxv.h>
1844
+ #define LOONGARCH_HWCAP_LSX (1 << 4)
1845
+ #define HAS_LSX_SUPPORT ((getauxval(AT_HWCAP) & LOONGARCH_HWCAP_LSX) != 0)
1846
+ #else
1847
+ #define HAS_LSX_SUPPORT 0
1848
+ #endif
1849
+
1850
+ typedef sljit_ins sljit_u32;
1851
+
1852
+ #define SI12_IMM_MASK 0x003ffc00
1853
+ #define UI5_IMM_MASK 0x00007c00
1854
+ #define UI2_IMM_MASK 0x00000c00
1855
+
1856
+ #define VD(vd) ((sljit_ins)vd << 0)
1857
+ #define VJ(vj) ((sljit_ins)vj << 5)
1858
+ #define VK(vk) ((sljit_ins)vk << 10)
1859
+ #define RD_V(rd) ((sljit_ins)rd << 0)
1860
+ #define RJ_V(rj) ((sljit_ins)rj << 5)
1861
+
1862
+ #define IMM_SI12(imm) (((sljit_ins)(imm) << 10) & SI12_IMM_MASK)
1863
+ #define IMM_UI5(imm) (((sljit_ins)(imm) << 10) & UI5_IMM_MASK)
1864
+ #define IMM_UI2(imm) (((sljit_ins)(imm) << 10) & UI2_IMM_MASK)
1865
+
1866
+ // LSX OPCODES:
1867
+ #define VLD 0x2c000000
1868
+ #define VOR_V 0x71268000
1869
+ #define VAND_V 0x71260000
1870
+ #define VBSLL_V 0x728e0000
1871
+ #define VMSKLTZ_B 0x729c4000
1872
+ #define VPICKVE2GR_WU 0x72f3e000
1873
+
1874
+ #if PCRE2_CODE_UNIT_WIDTH == 8
1875
+ #define VREPLGR2VR 0x729f0000
1876
+ #define VSEQ 0x70000000
1877
+ #elif PCRE2_CODE_UNIT_WIDTH == 16
1878
+ #define VREPLGR2VR 0x729f0400
1879
+ #define VSEQ 0x70008000
1880
+ #else
1881
+ #define VREPLGR2VR 0x729f0800
1882
+ #define VSEQ 0x70010000
1883
+ #endif
1884
+
1885
+ static void fast_forward_char_pair_lsx_compare(struct sljit_compiler *compiler, vector_compare_type compare_type,
1886
+ sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
1887
+ {
1888
+ if (compare_type != vector_compare_match2)
1889
+ {
1890
+ if (compare_type == vector_compare_match1i)
1891
+ {
1892
+ /* VOR.V vd, vj, vk */
1893
+ push_inst(compiler, VOR_V | VD(dst_ind) | VJ(cmp2_ind) | VK(dst_ind));
1894
+ }
1895
+
1896
+ /* VSEQ.B/H/W vd, vj, vk */
1897
+ push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
1898
+ return;
1899
+ }
1900
+
1901
+ /* VBSLL.V vd, vj, ui5 */
1902
+ push_inst(compiler, VBSLL_V | VD(tmp_ind) | VJ(dst_ind) | IMM_UI5(0));
1903
+
1904
+ /* VSEQ.B/H/W vd, vj, vk */
1905
+ push_inst(compiler, VSEQ | VD(dst_ind) | VJ(dst_ind) | VK(cmp1_ind));
1906
+
1907
+ /* VSEQ.B/H/W vd, vj, vk */
1908
+ push_inst(compiler, VSEQ | VD(tmp_ind) | VJ(tmp_ind) | VK(cmp2_ind));
1909
+
1910
+ /* VOR vd, vj, vk */
1911
+ push_inst(compiler, VOR_V | VD(dst_ind) | VJ(tmp_ind) | VK(dst_ind));
1912
+ return;
1913
+ }
1914
+
1915
+ #define JIT_HAS_FAST_FORWARD_CHAR_SIMD HAS_LSX_SUPPORT
1916
+
1917
+ static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
1918
+ {
1919
+ DEFINE_COMPILER;
1920
+ struct sljit_label *start;
1921
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1922
+ struct sljit_label *restart;
1923
+ #endif
1924
+ struct sljit_jump *quit;
1925
+ struct sljit_jump *partial_quit[2];
1926
+ vector_compare_type compare_type = vector_compare_match1;
1927
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
1928
+ sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
1929
+ sljit_s32 data_ind = 0;
1930
+ sljit_s32 tmp_ind = 1;
1931
+ sljit_s32 cmp1_ind = 2;
1932
+ sljit_s32 cmp2_ind = 3;
1933
+ sljit_u32 bit = 0;
1934
+
1935
+ SLJIT_UNUSED_ARG(offset);
1936
+
1937
+ if (char1 != char2)
1938
+ {
1939
+ bit = char1 ^ char2;
1940
+ compare_type = vector_compare_match1i;
1941
+
1942
+ if (!is_powerof2(bit))
1943
+ {
1944
+ bit = 0;
1945
+ compare_type = vector_compare_match2;
1946
+ }
1947
+ }
1948
+
1949
+ partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1950
+ if (common->mode == PCRE2_JIT_COMPLETE)
1951
+ add_jump(compiler, &common->failed_match, partial_quit[0]);
1952
+
1953
+ /* First part (unaligned start) */
1954
+
1955
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
1956
+
1957
+ /* VREPLGR2VR.B/H/W vd, rj */
1958
+ push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
1959
+
1960
+ if (char1 != char2)
1961
+ {
1962
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
1963
+
1964
+ /* VREPLGR2VR.B/H/W vd, rj */
1965
+ push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
1966
+ }
1967
+
1968
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
1969
+
1970
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1971
+ restart = LABEL();
1972
+ #endif
1973
+
1974
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
1975
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1976
+
1977
+ /* VLD vd, rj, si12 */
1978
+ push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
1979
+ fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
1980
+
1981
+ /* VMSKLTZ.B vd, vj */
1982
+ push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
1983
+
1984
+ /* VPICKVE2GR.WU rd, vj, ui2 */
1985
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
1986
+
1987
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1988
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
1989
+
1990
+ quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
1991
+
1992
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1993
+
1994
+ /* Second part (aligned) */
1995
+ start = LABEL();
1996
+
1997
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
1998
+
1999
+ partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2000
+ if (common->mode == PCRE2_JIT_COMPLETE)
2001
+ add_jump(compiler, &common->failed_match, partial_quit[1]);
2002
+
2003
+ /* VLD vd, rj, si12 */
2004
+ push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2005
+ fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2006
+
2007
+ /* VMSKLTZ.B vd, vj */
2008
+ push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2009
+
2010
+ /* VPICKVE2GR.WU rd, vj, ui2 */
2011
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2012
+
2013
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2014
+
2015
+ JUMPHERE(quit);
2016
+
2017
+ /* CTZ.W rd, rj */
2018
+ push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2019
+
2020
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2021
+
2022
+ if (common->mode != PCRE2_JIT_COMPLETE)
2023
+ {
2024
+ JUMPHERE(partial_quit[0]);
2025
+ JUMPHERE(partial_quit[1]);
2026
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
2027
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
2028
+ }
2029
+ else
2030
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2031
+
2032
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2033
+ if (common->utf && offset > 0)
2034
+ {
2035
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
2036
+
2037
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
2038
+
2039
+ quit = jump_if_utf_char_start(compiler, TMP1);
2040
+
2041
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2042
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2043
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
2044
+ JUMPTO(SLJIT_JUMP, restart);
2045
+
2046
+ JUMPHERE(quit);
2047
+ }
2048
+ #endif
2049
+ }
2050
+
2051
+ #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD HAS_LSX_SUPPORT
2052
+
2053
+ static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
2054
+ {
2055
+ DEFINE_COMPILER;
2056
+ struct sljit_label *start;
2057
+ struct sljit_jump *quit;
2058
+ jump_list *not_found = NULL;
2059
+ vector_compare_type compare_type = vector_compare_match1;
2060
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
2061
+ sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
2062
+ sljit_s32 data_ind = 0;
2063
+ sljit_s32 tmp_ind = 1;
2064
+ sljit_s32 cmp1_ind = 2;
2065
+ sljit_s32 cmp2_ind = 3;
2066
+ sljit_u32 bit = 0;
2067
+
2068
+ if (char1 != char2)
2069
+ {
2070
+ bit = char1 ^ char2;
2071
+ compare_type = vector_compare_match1i;
2072
+
2073
+ if (!is_powerof2(bit))
2074
+ {
2075
+ bit = 0;
2076
+ compare_type = vector_compare_match2;
2077
+ }
2078
+ }
2079
+
2080
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
2081
+ OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
2082
+ OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2083
+
2084
+ /* First part (unaligned start) */
2085
+
2086
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1 | bit);
2087
+
2088
+ /* VREPLGR2VR vd, rj */
2089
+ push_inst(compiler, VREPLGR2VR | VD(cmp1_ind) | RJ_V(tmp1_reg_ind));
2090
+
2091
+ if (char1 != char2)
2092
+ {
2093
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, bit != 0 ? bit : char2);
2094
+ /* VREPLGR2VR vd, rj */
2095
+ push_inst(compiler, VREPLGR2VR | VD(cmp2_ind) | RJ_V(tmp1_reg_ind));
2096
+ }
2097
+
2098
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
2099
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
2100
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2101
+
2102
+ /* VLD vd, rj, si12 */
2103
+ push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2104
+ fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2105
+
2106
+ /* VMSKLTZ.B vd, vj */
2107
+ push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2108
+
2109
+ /* VPICKVE2GR.WU rd, vj, ui2 */
2110
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2111
+
2112
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2113
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
2114
+
2115
+ quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
2116
+
2117
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2118
+
2119
+ /* Second part (aligned) */
2120
+ start = LABEL();
2121
+
2122
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
2123
+
2124
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2125
+
2126
+ /* VLD vd, rj, si12 */
2127
+ push_inst(compiler, VLD | VD(data_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2128
+ fast_forward_char_pair_lsx_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
2129
+
2130
+ /* VMSKLTZ.B vd, vj */
2131
+ push_inst(compiler, VMSKLTZ_B | VD(tmp_ind) | VJ(data_ind));
2132
+
2133
+ /* VPICKVE2GR.WU rd, vj, ui2 */
2134
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp_ind) | IMM_UI2(0));
2135
+
2136
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2137
+
2138
+ JUMPHERE(quit);
2139
+
2140
+ /* CTZ.W rd, rj */
2141
+ push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2142
+
2143
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
2144
+ add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
2145
+
2146
+ OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2147
+ return not_found;
2148
+ }
2149
+
2150
+ #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD HAS_LSX_SUPPORT
2151
+
2152
+ static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
2153
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
2154
+ {
2155
+ DEFINE_COMPILER;
2156
+ vector_compare_type compare1_type = vector_compare_match1;
2157
+ vector_compare_type compare2_type = vector_compare_match1;
2158
+ sljit_u32 bit1 = 0;
2159
+ sljit_u32 bit2 = 0;
2160
+ sljit_u32 diff = IN_UCHARS(offs1 - offs2);
2161
+ sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
2162
+ sljit_s32 tmp2_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP2);
2163
+ sljit_s32 str_ptr_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, STR_PTR);
2164
+ sljit_s32 data1_ind = 0;
2165
+ sljit_s32 data2_ind = 1;
2166
+ sljit_s32 tmp1_ind = 2;
2167
+ sljit_s32 tmp2_ind = 3;
2168
+ sljit_s32 cmp1a_ind = 4;
2169
+ sljit_s32 cmp1b_ind = 5;
2170
+ sljit_s32 cmp2a_ind = 6;
2171
+ sljit_s32 cmp2b_ind = 7;
2172
+ struct sljit_label *start;
2173
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2174
+ struct sljit_label *restart;
2175
+ #endif
2176
+ struct sljit_jump *jump[2];
2177
+
2178
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
2179
+ SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
2180
+
2181
+ /* Initialize. */
2182
+ if (common->match_end_ptr != 0)
2183
+ {
2184
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
2185
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
2186
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2187
+
2188
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
2189
+ SELECT(SLJIT_LESS, STR_END, TMP1, 0, STR_END);
2190
+ }
2191
+
2192
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
2193
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2194
+
2195
+ if (char1a == char1b)
2196
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
2197
+ else
2198
+ {
2199
+ bit1 = char1a ^ char1b;
2200
+ if (is_powerof2(bit1))
2201
+ {
2202
+ compare1_type = vector_compare_match1i;
2203
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a | bit1);
2204
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit1);
2205
+ }
2206
+ else
2207
+ {
2208
+ compare1_type = vector_compare_match2;
2209
+ bit1 = 0;
2210
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char1a);
2211
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char1b);
2212
+ }
2213
+ }
2214
+
2215
+ /* VREPLGR2VR vd, rj */
2216
+ push_inst(compiler, VREPLGR2VR | VD(cmp1a_ind) | RJ_V(tmp1_reg_ind));
2217
+
2218
+ if (char1a != char1b)
2219
+ {
2220
+ /* VREPLGR2VR vd, rj */
2221
+ push_inst(compiler, VREPLGR2VR | VD(cmp1b_ind) | RJ_V(tmp2_reg_ind));
2222
+ }
2223
+
2224
+ if (char2a == char2b)
2225
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
2226
+ else
2227
+ {
2228
+ bit2 = char2a ^ char2b;
2229
+ if (is_powerof2(bit2))
2230
+ {
2231
+ compare2_type = vector_compare_match1i;
2232
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a | bit2);
2233
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, bit2);
2234
+ }
2235
+ else
2236
+ {
2237
+ compare2_type = vector_compare_match2;
2238
+ bit2 = 0;
2239
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, char2a);
2240
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, char2b);
2241
+ }
2242
+ }
2243
+
2244
+ /* VREPLGR2VR vd, rj */
2245
+ push_inst(compiler, VREPLGR2VR | VD(cmp2a_ind) | RJ_V(tmp1_reg_ind));
2246
+
2247
+ if (char2a != char2b)
2248
+ {
2249
+ /* VREPLGR2VR vd, rj */
2250
+ push_inst(compiler, VREPLGR2VR | VD(cmp2b_ind) | RJ_V(tmp2_reg_ind));
2251
+ }
2252
+
2253
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2254
+ restart = LABEL();
2255
+ #endif
2256
+
2257
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
2258
+ OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
2259
+ OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
2260
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2261
+
2262
+ /* VLD vd, rj, si12 */
2263
+ push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2264
+
2265
+ jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
2266
+
2267
+ /* VLD vd, rj, si12 */
2268
+ push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
2269
+ jump[1] = JUMP(SLJIT_JUMP);
2270
+
2271
+ JUMPHERE(jump[0]);
2272
+
2273
+ /* VBSLL.V vd, vj, ui5 */
2274
+ push_inst(compiler, VBSLL_V | VD(data2_ind) | VJ(data1_ind) | IMM_UI5(diff));
2275
+
2276
+ JUMPHERE(jump[1]);
2277
+
2278
+ fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
2279
+ fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
2280
+
2281
+ /* VAND vd, vj, vk */
2282
+ push_inst(compiler, VOR_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
2283
+
2284
+ /* VMSKLTZ.B vd, vj */
2285
+ push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
2286
+
2287
+ /* VPICKVE2GR.WU rd, vj, ui2 */
2288
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2289
+
2290
+ /* Ignore matches before the first STR_PTR. */
2291
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2292
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
2293
+
2294
+ jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
2295
+
2296
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2297
+
2298
+ /* Main loop. */
2299
+ start = LABEL();
2300
+
2301
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
2302
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2303
+
2304
+ /* VLD vd, rj, si12 */
2305
+ push_inst(compiler, VLD | VD(data1_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(0));
2306
+ push_inst(compiler, VLD | VD(data2_ind) | RJ_V(str_ptr_reg_ind) | IMM_SI12(-(sljit_s8)diff));
2307
+
2308
+ fast_forward_char_pair_lsx_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
2309
+ fast_forward_char_pair_lsx_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
2310
+
2311
+ /* VAND.V vd, vj, vk */
2312
+ push_inst(compiler, VAND_V | VD(data1_ind) | VJ(data1_ind) | VK(data2_ind));
2313
+
2314
+ /* VMSKLTZ.B vd, vj */
2315
+ push_inst(compiler, VMSKLTZ_B | VD(tmp1_ind) | VJ(data1_ind));
2316
+
2317
+ /* VPICKVE2GR.WU rd, vj, ui2 */
2318
+ push_inst(compiler, VPICKVE2GR_WU | RD_V(tmp1_reg_ind) | VJ(tmp1_ind) | IMM_UI2(0));
2319
+
2320
+ CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
2321
+
2322
+ JUMPHERE(jump[0]);
2323
+
2324
+ /* CTZ.W rd, rj */
2325
+ push_inst(compiler, CTZ_W | RD_V(tmp1_reg_ind) | RJ_V(tmp1_reg_ind));
2326
+
2327
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2328
+
2329
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2330
+
2331
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2332
+ if (common->utf)
2333
+ {
2334
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
2335
+
2336
+ jump[0] = jump_if_utf_char_start(compiler, TMP1);
2337
+
2338
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2339
+ CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
2340
+
2341
+ add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
2342
+
2343
+ JUMPHERE(jump[0]);
2344
+ }
2345
+ #endif
2346
+
2347
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
2348
+
2349
+ if (common->match_end_ptr != 0)
2350
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2351
+ }
2352
+
2353
+ #endif /* SLJIT_CONFIG_LOONGARCH_64 */
2354
+
2355
+ #endif /* !SUPPORT_VALGRIND */