warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/tests/test_vec.py CHANGED
@@ -5,9 +5,12 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import unittest
9
+
8
10
  import numpy as np
11
+
9
12
  import warp as wp
10
- from warp.tests.test_base import *
13
+ from warp.tests.unittest_utils import *
11
14
 
12
15
  wp.init()
13
16
 
@@ -27,1573 +30,184 @@ np_unsigned_int_types = [
27
30
  np.ubyte,
28
31
  ]
29
32
 
30
- np_int_types = np_signed_int_types + np_unsigned_int_types
31
-
32
33
  np_float_types = [np.float16, np.float32, np.float64]
33
34
 
34
- np_scalar_types = np_int_types + np_float_types
35
-
36
35
 
37
- def randvals(shape, dtype):
36
+ def randvals(rng, shape, dtype):
38
37
  if dtype in np_float_types:
39
- return np.random.randn(*shape).astype(dtype)
38
+ return rng.standard_normal(size=shape).astype(dtype)
40
39
  elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
41
- return np.random.randint(1, 3, size=shape, dtype=dtype)
42
- return np.random.randint(1, 5, size=shape, dtype=dtype)
40
+ return rng.integers(1, high=3, size=shape, dtype=dtype)
41
+ return rng.integers(1, high=5, size=shape, dtype=dtype)
43
42
 
44
43
 
45
44
  kernel_cache = dict()
46
45
 
47
46
 
48
47
  def getkernel(func, suffix=""):
49
- module = wp.get_module(func.__module__)
50
48
  key = func.__name__ + "_" + suffix
51
49
  if key not in kernel_cache:
52
- kernel_cache[key] = wp.Kernel(func=func, key=key, module=module)
50
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
53
51
  return kernel_cache[key]
54
52
 
55
53
 
56
- def get_select_kernel(dtype):
57
- def output_select_kernel_fn(
58
- input: wp.array(dtype=dtype),
59
- index: int,
60
- out: wp.array(dtype=dtype),
61
- ):
62
- out[0] = input[index]
63
-
64
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
65
-
66
-
67
- def get_select_kernel2(dtype):
68
- def output_select_kernel2_fn(
69
- input: wp.array(dtype=dtype, ndim=2),
70
- index0: int,
71
- index1: int,
72
- out: wp.array(dtype=dtype),
73
- ):
74
- out[0] = input[index0, index1]
75
-
76
- return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
77
-
78
-
79
- def test_arrays(test, device, dtype):
80
- np.random.seed(123)
81
-
82
- tol = {
83
- np.float16: 1.0e-3,
84
- np.float32: 1.0e-6,
85
- np.float64: 1.0e-8,
86
- }.get(dtype, 0)
87
-
88
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
89
- vec2 = wp.types.vector(length=2, dtype=wptype)
90
- vec3 = wp.types.vector(length=3, dtype=wptype)
91
- vec4 = wp.types.vector(length=4, dtype=wptype)
92
- vec5 = wp.types.vector(length=5, dtype=wptype)
93
-
94
- v2_np = randvals((10, 2), dtype)
95
- v3_np = randvals((10, 3), dtype)
96
- v4_np = randvals((10, 4), dtype)
97
- v5_np = randvals((10, 5), dtype)
98
-
99
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
100
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
101
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
102
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
103
-
104
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
105
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
106
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
107
- assert_np_equal(v5.numpy(), v5_np, tol=1.0e-6)
108
-
109
- vec2 = wp.types.vector(length=2, dtype=wptype)
110
- vec3 = wp.types.vector(length=3, dtype=wptype)
111
- vec4 = wp.types.vector(length=4, dtype=wptype)
112
-
113
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
114
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
115
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
116
-
117
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
118
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
119
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
120
-
121
-
122
- def test_anon_type_instance(test, device, dtype, register_kernels=False):
123
- np.random.seed(123)
124
-
125
- tol = {
126
- np.float16: 5.0e-3,
127
- np.float32: 1.0e-6,
128
- np.float64: 1.0e-8,
129
- }.get(dtype, 0)
130
-
131
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
54
+ def test_anon_constructor_error_dtype_keyword_missing(test, device):
55
+ @wp.kernel
56
+ def kernel():
57
+ wp.vector(length=123)
132
58
 
133
- def check_scalar_init(
134
- input: wp.array(dtype=wptype),
135
- output: wp.array(dtype=wptype),
136
- ):
137
- v2result = wp.vector(input[0], length=2)
138
- v3result = wp.vector(input[1], length=3)
139
- v4result = wp.vector(input[2], length=4)
140
- v5result = wp.vector(input[3], length=5)
141
-
142
- idx = 0
143
- for i in range(2):
144
- output[idx] = wptype(2) * v2result[i]
145
- idx = idx + 1
146
- for i in range(3):
147
- output[idx] = wptype(2) * v3result[i]
148
- idx = idx + 1
149
- for i in range(4):
150
- output[idx] = wptype(2) * v4result[i]
151
- idx = idx + 1
152
- for i in range(5):
153
- output[idx] = wptype(2) * v5result[i]
154
- idx = idx + 1
155
-
156
- def check_component_init(
157
- input: wp.array(dtype=wptype),
158
- output: wp.array(dtype=wptype),
59
+ with test.assertRaisesRegex(
60
+ RuntimeError,
61
+ r"vec\(\) must have dtype as a keyword argument if it has no positional arguments, e.g.: wp.vector\(length=5, dtype=wp.float32\)$",
159
62
  ):
160
- v2result = wp.vector(input[0], input[1])
161
- v3result = wp.vector(input[2], input[3], input[4])
162
- v4result = wp.vector(input[5], input[6], input[7], input[8])
163
- v5result = wp.vector(input[9], input[10], input[11], input[12], input[13])
164
-
165
- idx = 0
166
- for i in range(2):
167
- output[idx] = wptype(2) * v2result[i]
168
- idx = idx + 1
169
- for i in range(3):
170
- output[idx] = wptype(2) * v3result[i]
171
- idx = idx + 1
172
- for i in range(4):
173
- output[idx] = wptype(2) * v4result[i]
174
- idx = idx + 1
175
- for i in range(5):
176
- output[idx] = wptype(2) * v5result[i]
177
- idx = idx + 1
178
-
179
- scalar_kernel = getkernel(check_scalar_init, suffix=dtype.__name__)
180
- component_kernel = getkernel(check_component_init, suffix=dtype.__name__)
181
- output_select_kernel = get_select_kernel(wptype)
182
-
183
- if register_kernels:
184
- return
185
-
186
- input = wp.array(randvals([4], dtype), requires_grad=True, device=device)
187
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
188
-
189
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
190
-
191
- assert_np_equal(output.numpy()[:2], 2 * np.array([input.numpy()[0]] * 2), tol=1.0e-6)
192
- assert_np_equal(output.numpy()[2:5], 2 * np.array([input.numpy()[1]] * 3), tol=1.0e-6)
193
- assert_np_equal(output.numpy()[5:9], 2 * np.array([input.numpy()[2]] * 4), tol=1.0e-6)
194
- assert_np_equal(output.numpy()[9:], 2 * np.array([input.numpy()[3]] * 5), tol=1.0e-6)
195
-
196
- if dtype in np_float_types:
197
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
198
- for i in range(len(output)):
199
- tape = wp.Tape()
200
- with tape:
201
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
202
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
203
-
204
- tape.backward(loss=out)
205
- expected = np.zeros_like(input.numpy())
206
- if i < 2:
207
- expected[0] = 2
208
- elif i < 5:
209
- expected[1] = 2
210
- elif i < 9:
211
- expected[2] = 2
212
- else:
213
- expected[3] = 2
214
-
215
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
216
-
217
- tape.reset()
218
- tape.zero()
219
-
220
- input = wp.array(randvals([2 + 3 + 4 + 5], dtype), requires_grad=True, device=device)
221
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
222
-
223
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
224
-
225
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=1.0e-6)
226
-
227
- if dtype in np_float_types:
228
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
229
- for i in range(len(output)):
230
- tape = wp.Tape()
231
- with tape:
232
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
233
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
234
-
235
- tape.backward(loss=out)
236
- expected = np.zeros_like(input.numpy())
237
- expected[i] = 2
238
-
239
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
240
-
241
- tape.reset()
242
- tape.zero()
243
-
244
-
245
- def test_constants(test, device, dtype, register_kernels=False):
246
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
247
- vec2 = wp.types.vector(length=2, dtype=wptype)
248
- vec3 = wp.types.vector(length=3, dtype=wptype)
249
- vec4 = wp.types.vector(length=4, dtype=wptype)
250
- vec5 = wp.types.vector(length=5, dtype=wptype)
251
-
252
- cv2 = wp.constant(vec2(1, 2))
253
- cv3 = wp.constant(vec3(1, 2, 3))
254
- cv4 = wp.constant(vec4(1, 2, 3, 4))
255
- cv5 = wp.constant(vec5(1, 2, 3, 4, 5))
256
-
257
- def check_vector_constants():
258
- wp.expect_eq(cv2, vec2(wptype(1), wptype(2)))
259
- wp.expect_eq(cv3, vec3(wptype(1), wptype(2), wptype(3)))
260
- wp.expect_eq(cv4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
261
- wp.expect_eq(cv5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
262
-
263
- kernel = getkernel(check_vector_constants, suffix=dtype.__name__)
264
-
265
- if register_kernels:
266
- return
267
-
268
- wp.launch(kernel, dim=1, inputs=[])
269
-
270
-
271
- def test_constructors(test, device, dtype, register_kernels=False):
272
- np.random.seed(123)
63
+ wp.launch(
64
+ kernel,
65
+ dim=1,
66
+ inputs=[],
67
+ device=device,
68
+ )
273
69
 
274
- tol = {
275
- np.float16: 5.0e-3,
276
- np.float32: 1.0e-6,
277
- np.float64: 1.0e-8,
278
- }.get(dtype, 0)
279
70
 
280
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
281
- vec2 = wp.types.vector(length=2, dtype=wptype)
282
- vec3 = wp.types.vector(length=3, dtype=wptype)
283
- vec4 = wp.types.vector(length=4, dtype=wptype)
284
- vec5 = wp.types.vector(length=5, dtype=wptype)
71
+ def test_anon_constructor_error_length_mismatch(test, device):
72
+ @wp.kernel
73
+ def kernel():
74
+ wp.vector(
75
+ wp.vector(length=2, dtype=float),
76
+ length=3,
77
+ dtype=float,
78
+ )
285
79
 
286
- def check_scalar_constructor(
287
- input: wp.array(dtype=wptype),
288
- v2: wp.array(dtype=vec2),
289
- v3: wp.array(dtype=vec3),
290
- v4: wp.array(dtype=vec4),
291
- v5: wp.array(dtype=vec5),
292
- v20: wp.array(dtype=wptype),
293
- v21: wp.array(dtype=wptype),
294
- v30: wp.array(dtype=wptype),
295
- v31: wp.array(dtype=wptype),
296
- v32: wp.array(dtype=wptype),
297
- v40: wp.array(dtype=wptype),
298
- v41: wp.array(dtype=wptype),
299
- v42: wp.array(dtype=wptype),
300
- v43: wp.array(dtype=wptype),
301
- v50: wp.array(dtype=wptype),
302
- v51: wp.array(dtype=wptype),
303
- v52: wp.array(dtype=wptype),
304
- v53: wp.array(dtype=wptype),
305
- v54: wp.array(dtype=wptype),
80
+ with test.assertRaisesRegex(
81
+ RuntimeError,
82
+ r"Incompatible vector lengths for casting copy constructor, 3 vs 2$",
306
83
  ):
307
- v2result = vec2(input[0])
308
- v3result = vec3(input[0])
309
- v4result = vec4(input[0])
310
- v5result = vec5(input[0])
311
-
312
- v2[0] = v2result
313
- v3[0] = v3result
314
- v4[0] = v4result
315
- v5[0] = v5result
316
-
317
- # multiply outputs by 2 so we've got something to backpropagate
318
- v20[0] = wptype(2) * v2result[0]
319
- v21[0] = wptype(2) * v2result[1]
320
-
321
- v30[0] = wptype(2) * v3result[0]
322
- v31[0] = wptype(2) * v3result[1]
323
- v32[0] = wptype(2) * v3result[2]
84
+ wp.launch(
85
+ kernel,
86
+ dim=1,
87
+ inputs=[],
88
+ device=device,
89
+ )
324
90
 
325
- v40[0] = wptype(2) * v4result[0]
326
- v41[0] = wptype(2) * v4result[1]
327
- v42[0] = wptype(2) * v4result[2]
328
- v43[0] = wptype(2) * v4result[3]
329
91
 
330
- v50[0] = wptype(2) * v5result[0]
331
- v51[0] = wptype(2) * v5result[1]
332
- v52[0] = wptype(2) * v5result[2]
333
- v53[0] = wptype(2) * v5result[3]
334
- v54[0] = wptype(2) * v5result[4]
92
+ def test_anon_constructor_error_numeric_arg_missing_1(test, device):
93
+ @wp.kernel
94
+ def kernel():
95
+ wp.vector(1.0, 2.0, length=12345)
335
96
 
336
- def check_vector_constructors(
337
- input: wp.array(dtype=wptype),
338
- v2: wp.array(dtype=vec2),
339
- v3: wp.array(dtype=vec3),
340
- v4: wp.array(dtype=vec4),
341
- v5: wp.array(dtype=vec5),
342
- v20: wp.array(dtype=wptype),
343
- v21: wp.array(dtype=wptype),
344
- v30: wp.array(dtype=wptype),
345
- v31: wp.array(dtype=wptype),
346
- v32: wp.array(dtype=wptype),
347
- v40: wp.array(dtype=wptype),
348
- v41: wp.array(dtype=wptype),
349
- v42: wp.array(dtype=wptype),
350
- v43: wp.array(dtype=wptype),
351
- v50: wp.array(dtype=wptype),
352
- v51: wp.array(dtype=wptype),
353
- v52: wp.array(dtype=wptype),
354
- v53: wp.array(dtype=wptype),
355
- v54: wp.array(dtype=wptype),
97
+ with test.assertRaisesRegex(
98
+ RuntimeError,
99
+ r"vec\(\) must have one scalar argument or the dtype keyword argument if the length keyword argument is specified, e.g.: wp.vec\(1.0, length=5\)$",
356
100
  ):
357
- v2result = vec2(input[0], input[1])
358
- v3result = vec3(input[2], input[3], input[4])
359
- v4result = vec4(input[5], input[6], input[7], input[8])
360
- v5result = vec5(input[9], input[10], input[11], input[12], input[13])
361
-
362
- v2[0] = v2result
363
- v3[0] = v3result
364
- v4[0] = v4result
365
- v5[0] = v5result
366
-
367
- # multiply the output by 2 so we've got something to backpropagate:
368
- v20[0] = wptype(2) * v2result[0]
369
- v21[0] = wptype(2) * v2result[1]
370
-
371
- v30[0] = wptype(2) * v3result[0]
372
- v31[0] = wptype(2) * v3result[1]
373
- v32[0] = wptype(2) * v3result[2]
374
-
375
- v40[0] = wptype(2) * v4result[0]
376
- v41[0] = wptype(2) * v4result[1]
377
- v42[0] = wptype(2) * v4result[2]
378
- v43[0] = wptype(2) * v4result[3]
379
-
380
- v50[0] = wptype(2) * v5result[0]
381
- v51[0] = wptype(2) * v5result[1]
382
- v52[0] = wptype(2) * v5result[2]
383
- v53[0] = wptype(2) * v5result[3]
384
- v54[0] = wptype(2) * v5result[4]
385
-
386
- vec_kernel = getkernel(check_vector_constructors, suffix=dtype.__name__)
387
- kernel = getkernel(check_scalar_constructor, suffix=dtype.__name__)
388
-
389
- if register_kernels:
390
- return
391
-
392
- input = wp.array(randvals([1], dtype), requires_grad=True, device=device)
393
- v2 = wp.zeros(1, dtype=vec2, device=device)
394
- v3 = wp.zeros(1, dtype=vec3, device=device)
395
- v4 = wp.zeros(1, dtype=vec4, device=device)
396
- v5 = wp.zeros(1, dtype=vec5, device=device)
397
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
398
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
399
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
400
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
401
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
402
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
403
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
404
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
405
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
406
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
407
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
408
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
409
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
410
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
411
-
412
- tape = wp.Tape()
413
- with tape:
414
101
  wp.launch(
415
102
  kernel,
416
103
  dim=1,
417
- inputs=[input],
418
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
104
+ inputs=[],
419
105
  device=device,
420
106
  )
421
107
 
422
- if dtype in np_float_types:
423
- for l in [v20, v21]:
424
- tape.backward(loss=l)
425
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
426
- tape.zero()
427
-
428
- for l in [v30, v31, v32]:
429
- tape.backward(loss=l)
430
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
431
- tape.zero()
432
-
433
- for l in [v40, v41, v42, v43]:
434
- tape.backward(loss=l)
435
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
436
- tape.zero()
437
108
 
438
- for l in [v50, v51, v52, v53, v54]:
439
- tape.backward(loss=l)
440
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
441
- tape.zero()
109
+ def test_anon_constructor_error_numeric_arg_missing_2(test, device):
110
+ @wp.kernel
111
+ def kernel():
112
+ wp.vector()
442
113
 
443
- val = input.numpy()[0]
444
- assert_np_equal(v2.numpy()[0], np.array([val, val]), tol=1.0e-6)
445
- assert_np_equal(v3.numpy()[0], np.array([val, val, val]), tol=1.0e-6)
446
- assert_np_equal(v4.numpy()[0], np.array([val, val, val, val]), tol=1.0e-6)
447
- assert_np_equal(v5.numpy()[0], np.array([val, val, val, val, val]), tol=1.0e-6)
448
-
449
- assert_np_equal(v20.numpy()[0], 2 * val, tol=1.0e-6)
450
- assert_np_equal(v21.numpy()[0], 2 * val, tol=1.0e-6)
451
- assert_np_equal(v30.numpy()[0], 2 * val, tol=1.0e-6)
452
- assert_np_equal(v31.numpy()[0], 2 * val, tol=1.0e-6)
453
- assert_np_equal(v32.numpy()[0], 2 * val, tol=1.0e-6)
454
- assert_np_equal(v40.numpy()[0], 2 * val, tol=1.0e-6)
455
- assert_np_equal(v41.numpy()[0], 2 * val, tol=1.0e-6)
456
- assert_np_equal(v42.numpy()[0], 2 * val, tol=1.0e-6)
457
- assert_np_equal(v43.numpy()[0], 2 * val, tol=1.0e-6)
458
- assert_np_equal(v50.numpy()[0], 2 * val, tol=1.0e-6)
459
- assert_np_equal(v51.numpy()[0], 2 * val, tol=1.0e-6)
460
- assert_np_equal(v52.numpy()[0], 2 * val, tol=1.0e-6)
461
- assert_np_equal(v53.numpy()[0], 2 * val, tol=1.0e-6)
462
- assert_np_equal(v54.numpy()[0], 2 * val, tol=1.0e-6)
463
-
464
- input = wp.array(randvals([14], dtype), requires_grad=True, device=device)
465
- tape = wp.Tape()
466
- with tape:
114
+ with test.assertRaisesRegex(
115
+ RuntimeError,
116
+ r"vec\(\) must have at least one numeric argument, if it's length, dtype is not specified$",
117
+ ):
467
118
  wp.launch(
468
- vec_kernel,
119
+ kernel,
469
120
  dim=1,
470
- inputs=[input],
471
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
121
+ inputs=[],
472
122
  device=device,
473
123
  )
474
124
 
475
- if dtype in np_float_types:
476
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
477
- tape.backward(loss=l)
478
- grad = tape.gradients[input].numpy()
479
- expected_grad = np.zeros_like(grad)
480
- expected_grad[i] = 2
481
- assert_np_equal(grad, expected_grad, tol=tol)
482
- tape.zero()
483
-
484
- assert_np_equal(v2.numpy()[0, 0], input.numpy()[0], tol=tol)
485
- assert_np_equal(v2.numpy()[0, 1], input.numpy()[1], tol=tol)
486
- assert_np_equal(v3.numpy()[0, 0], input.numpy()[2], tol=tol)
487
- assert_np_equal(v3.numpy()[0, 1], input.numpy()[3], tol=tol)
488
- assert_np_equal(v3.numpy()[0, 2], input.numpy()[4], tol=tol)
489
- assert_np_equal(v4.numpy()[0, 0], input.numpy()[5], tol=tol)
490
- assert_np_equal(v4.numpy()[0, 1], input.numpy()[6], tol=tol)
491
- assert_np_equal(v4.numpy()[0, 2], input.numpy()[7], tol=tol)
492
- assert_np_equal(v4.numpy()[0, 3], input.numpy()[8], tol=tol)
493
- assert_np_equal(v5.numpy()[0, 0], input.numpy()[9], tol=tol)
494
- assert_np_equal(v5.numpy()[0, 1], input.numpy()[10], tol=tol)
495
- assert_np_equal(v5.numpy()[0, 2], input.numpy()[11], tol=tol)
496
- assert_np_equal(v5.numpy()[0, 3], input.numpy()[12], tol=tol)
497
- assert_np_equal(v5.numpy()[0, 4], input.numpy()[13], tol=tol)
498
-
499
- assert_np_equal(v20.numpy()[0], 2 * input.numpy()[0], tol=tol)
500
- assert_np_equal(v21.numpy()[0], 2 * input.numpy()[1], tol=tol)
501
- assert_np_equal(v30.numpy()[0], 2 * input.numpy()[2], tol=tol)
502
- assert_np_equal(v31.numpy()[0], 2 * input.numpy()[3], tol=tol)
503
- assert_np_equal(v32.numpy()[0], 2 * input.numpy()[4], tol=tol)
504
- assert_np_equal(v40.numpy()[0], 2 * input.numpy()[5], tol=tol)
505
- assert_np_equal(v41.numpy()[0], 2 * input.numpy()[6], tol=tol)
506
- assert_np_equal(v42.numpy()[0], 2 * input.numpy()[7], tol=tol)
507
- assert_np_equal(v43.numpy()[0], 2 * input.numpy()[8], tol=tol)
508
- assert_np_equal(v50.numpy()[0], 2 * input.numpy()[9], tol=tol)
509
- assert_np_equal(v51.numpy()[0], 2 * input.numpy()[10], tol=tol)
510
- assert_np_equal(v52.numpy()[0], 2 * input.numpy()[11], tol=tol)
511
- assert_np_equal(v53.numpy()[0], 2 * input.numpy()[12], tol=tol)
512
- assert_np_equal(v54.numpy()[0], 2 * input.numpy()[13], tol=tol)
513
-
514
-
515
- def test_indexing(test, device, dtype, register_kernels=False):
516
- np.random.seed(123)
517
-
518
- tol = {
519
- np.float16: 5.0e-3,
520
- np.float32: 1.0e-6,
521
- np.float64: 1.0e-8,
522
- }.get(dtype, 0)
523
125
 
524
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
525
- vec2 = wp.types.vector(length=2, dtype=wptype)
526
- vec3 = wp.types.vector(length=3, dtype=wptype)
527
- vec4 = wp.types.vector(length=4, dtype=wptype)
528
- vec5 = wp.types.vector(length=5, dtype=wptype)
126
+ def test_anon_constructor_error_dtype_keyword_extraneous(test, device):
127
+ @wp.kernel
128
+ def kernel():
129
+ wp.vector(1.0, 2.0, 3.0, dtype=float)
529
130
 
530
- def check_indexing(
531
- v2: wp.array(dtype=vec2),
532
- v3: wp.array(dtype=vec3),
533
- v4: wp.array(dtype=vec4),
534
- v5: wp.array(dtype=vec5),
535
- v20: wp.array(dtype=wptype),
536
- v21: wp.array(dtype=wptype),
537
- v30: wp.array(dtype=wptype),
538
- v31: wp.array(dtype=wptype),
539
- v32: wp.array(dtype=wptype),
540
- v40: wp.array(dtype=wptype),
541
- v41: wp.array(dtype=wptype),
542
- v42: wp.array(dtype=wptype),
543
- v43: wp.array(dtype=wptype),
544
- v50: wp.array(dtype=wptype),
545
- v51: wp.array(dtype=wptype),
546
- v52: wp.array(dtype=wptype),
547
- v53: wp.array(dtype=wptype),
548
- v54: wp.array(dtype=wptype),
131
+ with test.assertRaisesRegex(
132
+ RuntimeError,
133
+ r"vec\(\) should not have dtype specified if numeric arguments are given, the dtype will be inferred from the argument types$",
549
134
  ):
550
- # multiply outputs by 2 so we've got something to backpropagate:
551
- v20[0] = wptype(2) * v2[0][0]
552
- v21[0] = wptype(2) * v2[0][1]
553
-
554
- v30[0] = wptype(2) * v3[0][0]
555
- v31[0] = wptype(2) * v3[0][1]
556
- v32[0] = wptype(2) * v3[0][2]
557
-
558
- v40[0] = wptype(2) * v4[0][0]
559
- v41[0] = wptype(2) * v4[0][1]
560
- v42[0] = wptype(2) * v4[0][2]
561
- v43[0] = wptype(2) * v4[0][3]
562
-
563
- v50[0] = wptype(2) * v5[0][0]
564
- v51[0] = wptype(2) * v5[0][1]
565
- v52[0] = wptype(2) * v5[0][2]
566
- v53[0] = wptype(2) * v5[0][3]
567
- v54[0] = wptype(2) * v5[0][4]
568
-
569
- kernel = getkernel(check_indexing, suffix=dtype.__name__)
570
-
571
- if register_kernels:
572
- return
573
-
574
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
575
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
576
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
577
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
578
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
579
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
580
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
581
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
582
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
583
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
584
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
585
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
586
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
587
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
588
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
589
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
590
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
591
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
592
-
593
- tape = wp.Tape()
594
- with tape:
595
- wp.launch(
596
- kernel,
597
- dim=1,
598
- inputs=[v2, v3, v4, v5],
599
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
600
- device=device,
601
- )
602
-
603
- if dtype in np_float_types:
604
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
605
- tape.backward(loss=l)
606
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
607
- expected_grads = np.zeros_like(allgrads)
608
- expected_grads[i] = 2
609
- assert_np_equal(allgrads, expected_grads, tol=tol)
610
- tape.zero()
611
-
612
- assert_np_equal(v20.numpy()[0], 2.0 * v2.numpy()[0, 0], tol=tol)
613
- assert_np_equal(v21.numpy()[0], 2.0 * v2.numpy()[0, 1], tol=tol)
614
- assert_np_equal(v30.numpy()[0], 2.0 * v3.numpy()[0, 0], tol=tol)
615
- assert_np_equal(v31.numpy()[0], 2.0 * v3.numpy()[0, 1], tol=tol)
616
- assert_np_equal(v32.numpy()[0], 2.0 * v3.numpy()[0, 2], tol=tol)
617
- assert_np_equal(v40.numpy()[0], 2.0 * v4.numpy()[0, 0], tol=tol)
618
- assert_np_equal(v41.numpy()[0], 2.0 * v4.numpy()[0, 1], tol=tol)
619
- assert_np_equal(v42.numpy()[0], 2.0 * v4.numpy()[0, 2], tol=tol)
620
- assert_np_equal(v43.numpy()[0], 2.0 * v4.numpy()[0, 3], tol=tol)
621
- assert_np_equal(v50.numpy()[0], 2.0 * v5.numpy()[0, 0], tol=tol)
622
- assert_np_equal(v51.numpy()[0], 2.0 * v5.numpy()[0, 1], tol=tol)
623
- assert_np_equal(v52.numpy()[0], 2.0 * v5.numpy()[0, 2], tol=tol)
624
- assert_np_equal(v53.numpy()[0], 2.0 * v5.numpy()[0, 3], tol=tol)
625
- assert_np_equal(v54.numpy()[0], 2.0 * v5.numpy()[0, 4], tol=tol)
626
-
627
-
628
- def test_equality(test, device, dtype, register_kernels=False):
629
- np.random.seed(123)
630
-
631
- tol = {
632
- np.float16: 1.0e-3,
633
- np.float32: 1.0e-6,
634
- np.float64: 1.0e-8,
635
- }.get(dtype, 0)
636
-
637
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
638
- vec2 = wp.types.vector(length=2, dtype=wptype)
639
- vec3 = wp.types.vector(length=3, dtype=wptype)
640
- vec4 = wp.types.vector(length=4, dtype=wptype)
641
- vec5 = wp.types.vector(length=5, dtype=wptype)
642
-
643
- def check_equality(
644
- v20: wp.array(dtype=vec2),
645
- v21: wp.array(dtype=vec2),
646
- v22: wp.array(dtype=vec2),
647
- v30: wp.array(dtype=vec3),
648
- v31: wp.array(dtype=vec3),
649
- v32: wp.array(dtype=vec3),
650
- v33: wp.array(dtype=vec3),
651
- v40: wp.array(dtype=vec4),
652
- v41: wp.array(dtype=vec4),
653
- v42: wp.array(dtype=vec4),
654
- v43: wp.array(dtype=vec4),
655
- v44: wp.array(dtype=vec4),
656
- v50: wp.array(dtype=vec5),
657
- v51: wp.array(dtype=vec5),
658
- v52: wp.array(dtype=vec5),
659
- v53: wp.array(dtype=vec5),
660
- v54: wp.array(dtype=vec5),
661
- v55: wp.array(dtype=vec5),
662
- ):
663
- wp.expect_eq(v20[0], v20[0])
664
- wp.expect_neq(v21[0], v20[0])
665
- wp.expect_neq(v22[0], v20[0])
666
-
667
- wp.expect_eq(v30[0], v30[0])
668
- wp.expect_neq(v31[0], v30[0])
669
- wp.expect_neq(v32[0], v30[0])
670
- wp.expect_neq(v33[0], v30[0])
671
-
672
- wp.expect_eq(v40[0], v40[0])
673
- wp.expect_neq(v41[0], v40[0])
674
- wp.expect_neq(v42[0], v40[0])
675
- wp.expect_neq(v43[0], v40[0])
676
- wp.expect_neq(v44[0], v40[0])
677
-
678
- wp.expect_eq(v50[0], v50[0])
679
- wp.expect_neq(v51[0], v50[0])
680
- wp.expect_neq(v52[0], v50[0])
681
- wp.expect_neq(v53[0], v50[0])
682
- wp.expect_neq(v54[0], v50[0])
683
- wp.expect_neq(v55[0], v50[0])
684
-
685
- kernel = getkernel(check_equality, suffix=dtype.__name__)
686
-
687
- if register_kernels:
688
- return
689
-
690
- v20 = wp.array([1.0, 2.0], dtype=vec2, requires_grad=True, device=device)
691
- v21 = wp.array([1.0, 3.0], dtype=vec2, requires_grad=True, device=device)
692
- v22 = wp.array([3.0, 2.0], dtype=vec2, requires_grad=True, device=device)
693
-
694
- v30 = wp.array([1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
695
- v31 = wp.array([-1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
696
- v32 = wp.array([1.0, -2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
697
- v33 = wp.array([1.0, 2.0, -3.0], dtype=vec3, requires_grad=True, device=device)
698
-
699
- v40 = wp.array([1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
700
- v41 = wp.array([-1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
701
- v42 = wp.array([1.0, -2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
702
- v43 = wp.array([1.0, 2.0, -3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
703
- v44 = wp.array([1.0, 2.0, 3.0, -4.0], dtype=vec4, requires_grad=True, device=device)
704
-
705
- v50 = wp.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
706
- v51 = wp.array([-1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
707
- v52 = wp.array([1.0, -2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
708
- v53 = wp.array([1.0, 2.0, -3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
709
- v54 = wp.array([1.0, 2.0, 3.0, -4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
710
- v55 = wp.array([1.0, 2.0, 3.0, 4.0, -5.0], dtype=vec5, requires_grad=True, device=device)
711
- wp.launch(
712
- kernel,
713
- dim=1,
714
- inputs=[
715
- v20,
716
- v21,
717
- v22,
718
- v30,
719
- v31,
720
- v32,
721
- v33,
722
- v40,
723
- v41,
724
- v42,
725
- v43,
726
- v44,
727
- v50,
728
- v51,
729
- v52,
730
- v53,
731
- v54,
732
- v55,
733
- ],
734
- outputs=[],
735
- device=device,
736
- )
737
-
738
-
739
- def test_negation(test, device, dtype, register_kernels=False):
740
- np.random.seed(123)
741
-
742
- tol = {
743
- np.float16: 5.0e-3,
744
- np.float32: 1.0e-6,
745
- np.float64: 1.0e-8,
746
- }.get(dtype, 0)
747
-
748
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
749
- vec2 = wp.types.vector(length=2, dtype=wptype)
750
- vec3 = wp.types.vector(length=3, dtype=wptype)
751
- vec4 = wp.types.vector(length=4, dtype=wptype)
752
- vec5 = wp.types.vector(length=5, dtype=wptype)
753
-
754
- def check_negation(
755
- v2: wp.array(dtype=vec2),
756
- v3: wp.array(dtype=vec3),
757
- v4: wp.array(dtype=vec4),
758
- v5: wp.array(dtype=vec5),
759
- v2out: wp.array(dtype=vec2),
760
- v3out: wp.array(dtype=vec3),
761
- v4out: wp.array(dtype=vec4),
762
- v5out: wp.array(dtype=vec5),
763
- v20: wp.array(dtype=wptype),
764
- v21: wp.array(dtype=wptype),
765
- v30: wp.array(dtype=wptype),
766
- v31: wp.array(dtype=wptype),
767
- v32: wp.array(dtype=wptype),
768
- v40: wp.array(dtype=wptype),
769
- v41: wp.array(dtype=wptype),
770
- v42: wp.array(dtype=wptype),
771
- v43: wp.array(dtype=wptype),
772
- v50: wp.array(dtype=wptype),
773
- v51: wp.array(dtype=wptype),
774
- v52: wp.array(dtype=wptype),
775
- v53: wp.array(dtype=wptype),
776
- v54: wp.array(dtype=wptype),
777
- ):
778
- v2result = -v2[0]
779
- v3result = -v3[0]
780
- v4result = -v4[0]
781
- v5result = -v5[0]
782
-
783
- v2out[0] = v2result
784
- v3out[0] = v3result
785
- v4out[0] = v4result
786
- v5out[0] = v5result
787
-
788
- # multiply these outputs by 2 so we've got something to backpropagate:
789
- v20[0] = wptype(2) * v2result[0]
790
- v21[0] = wptype(2) * v2result[1]
791
-
792
- v30[0] = wptype(2) * v3result[0]
793
- v31[0] = wptype(2) * v3result[1]
794
- v32[0] = wptype(2) * v3result[2]
795
-
796
- v40[0] = wptype(2) * v4result[0]
797
- v41[0] = wptype(2) * v4result[1]
798
- v42[0] = wptype(2) * v4result[2]
799
- v43[0] = wptype(2) * v4result[3]
800
-
801
- v50[0] = wptype(2) * v5result[0]
802
- v51[0] = wptype(2) * v5result[1]
803
- v52[0] = wptype(2) * v5result[2]
804
- v53[0] = wptype(2) * v5result[3]
805
- v54[0] = wptype(2) * v5result[4]
806
-
807
- kernel = getkernel(check_negation, suffix=dtype.__name__)
808
-
809
- if register_kernels:
810
- return
811
-
812
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
813
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
814
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
815
- v5_np = randvals((1, 5), dtype)
816
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
817
-
818
- v2out = wp.zeros(1, dtype=vec2, device=device)
819
- v3out = wp.zeros(1, dtype=vec3, device=device)
820
- v4out = wp.zeros(1, dtype=vec4, device=device)
821
- v5out = wp.zeros(1, dtype=vec5, device=device)
822
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
823
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
824
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
825
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
826
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
827
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
828
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
829
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
830
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
831
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
832
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
833
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
834
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
835
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
836
-
837
- tape = wp.Tape()
838
- with tape:
839
- wp.launch(
840
- kernel,
841
- dim=1,
842
- inputs=[v2, v3, v4, v5],
843
- outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
844
- device=device,
845
- )
846
-
847
- if dtype in np_float_types:
848
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
849
- tape.backward(loss=l)
850
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
851
- expected_grads = np.zeros_like(allgrads)
852
- expected_grads[i] = -2
853
- assert_np_equal(allgrads, expected_grads, tol=tol)
854
- tape.zero()
855
-
856
- assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
857
- assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
858
- assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
859
- assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
860
-
861
-
862
- def test_scalar_multiplication(test, device, dtype, register_kernels=False):
863
- np.random.seed(123)
864
-
865
- tol = {
866
- np.float16: 5.0e-3,
867
- np.float32: 1.0e-6,
868
- np.float64: 1.0e-8,
869
- }.get(dtype, 0)
870
-
871
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
872
- vec2 = wp.types.vector(length=2, dtype=wptype)
873
- vec3 = wp.types.vector(length=3, dtype=wptype)
874
- vec4 = wp.types.vector(length=4, dtype=wptype)
875
- vec5 = wp.types.vector(length=5, dtype=wptype)
876
-
877
- def check_mul(
878
- s: wp.array(dtype=wptype),
879
- v2: wp.array(dtype=vec2),
880
- v3: wp.array(dtype=vec3),
881
- v4: wp.array(dtype=vec4),
882
- v5: wp.array(dtype=vec5),
883
- v20: wp.array(dtype=wptype),
884
- v21: wp.array(dtype=wptype),
885
- v30: wp.array(dtype=wptype),
886
- v31: wp.array(dtype=wptype),
887
- v32: wp.array(dtype=wptype),
888
- v40: wp.array(dtype=wptype),
889
- v41: wp.array(dtype=wptype),
890
- v42: wp.array(dtype=wptype),
891
- v43: wp.array(dtype=wptype),
892
- v50: wp.array(dtype=wptype),
893
- v51: wp.array(dtype=wptype),
894
- v52: wp.array(dtype=wptype),
895
- v53: wp.array(dtype=wptype),
896
- v54: wp.array(dtype=wptype),
897
- ):
898
- v2result = s[0] * v2[0]
899
- v3result = s[0] * v3[0]
900
- v4result = s[0] * v4[0]
901
- v5result = s[0] * v5[0]
902
-
903
- # multiply outputs by 2 so we've got something to backpropagate:
904
- v20[0] = wptype(2) * v2result[0]
905
- v21[0] = wptype(2) * v2result[1]
906
-
907
- v30[0] = wptype(2) * v3result[0]
908
- v31[0] = wptype(2) * v3result[1]
909
- v32[0] = wptype(2) * v3result[2]
910
-
911
- v40[0] = wptype(2) * v4result[0]
912
- v41[0] = wptype(2) * v4result[1]
913
- v42[0] = wptype(2) * v4result[2]
914
- v43[0] = wptype(2) * v4result[3]
915
-
916
- v50[0] = wptype(2) * v5result[0]
917
- v51[0] = wptype(2) * v5result[1]
918
- v52[0] = wptype(2) * v5result[2]
919
- v53[0] = wptype(2) * v5result[3]
920
- v54[0] = wptype(2) * v5result[4]
921
-
922
- kernel = getkernel(check_mul, suffix=dtype.__name__)
923
-
924
- if register_kernels:
925
- return
926
-
927
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
928
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
929
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
930
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
931
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
932
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
933
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
934
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
935
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
936
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
937
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
938
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
939
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
940
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
941
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
942
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
943
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
944
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
945
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
946
- tape = wp.Tape()
947
- with tape:
948
- wp.launch(
949
- kernel,
950
- dim=1,
951
- inputs=[
952
- s,
953
- v2,
954
- v3,
955
- v4,
956
- v5,
957
- ],
958
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
959
- device=device,
960
- )
961
-
962
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
963
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
964
-
965
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
966
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
967
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
968
-
969
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
970
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
971
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
972
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
973
-
974
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
975
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
976
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
977
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
978
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
979
-
980
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
981
-
982
- if dtype in np_float_types:
983
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
984
- tape.backward(loss=l)
985
- sgrad = tape.gradients[s].numpy()[0]
986
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
987
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
988
- expected_grads = np.zeros_like(allgrads)
989
- expected_grads[i] = s.numpy()[0] * 2
990
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
991
- tape.zero()
992
-
993
-
994
- def test_scalar_multiplication_rightmul(test, device, dtype, register_kernels=False):
995
- np.random.seed(123)
996
-
997
- tol = {
998
- np.float16: 5.0e-3,
999
- np.float32: 1.0e-6,
1000
- np.float64: 1.0e-8,
1001
- }.get(dtype, 0)
1002
-
1003
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1004
- vec2 = wp.types.vector(length=2, dtype=wptype)
1005
- vec3 = wp.types.vector(length=3, dtype=wptype)
1006
- vec4 = wp.types.vector(length=4, dtype=wptype)
1007
- vec5 = wp.types.vector(length=5, dtype=wptype)
1008
-
1009
- def check_rightmul(
1010
- s: wp.array(dtype=wptype),
1011
- v2: wp.array(dtype=vec2),
1012
- v3: wp.array(dtype=vec3),
1013
- v4: wp.array(dtype=vec4),
1014
- v5: wp.array(dtype=vec5),
1015
- v20: wp.array(dtype=wptype),
1016
- v21: wp.array(dtype=wptype),
1017
- v30: wp.array(dtype=wptype),
1018
- v31: wp.array(dtype=wptype),
1019
- v32: wp.array(dtype=wptype),
1020
- v40: wp.array(dtype=wptype),
1021
- v41: wp.array(dtype=wptype),
1022
- v42: wp.array(dtype=wptype),
1023
- v43: wp.array(dtype=wptype),
1024
- v50: wp.array(dtype=wptype),
1025
- v51: wp.array(dtype=wptype),
1026
- v52: wp.array(dtype=wptype),
1027
- v53: wp.array(dtype=wptype),
1028
- v54: wp.array(dtype=wptype),
1029
- ):
1030
- v2result = v2[0] * s[0]
1031
- v3result = v3[0] * s[0]
1032
- v4result = v4[0] * s[0]
1033
- v5result = v5[0] * s[0]
1034
-
1035
- # multiply outputs by 2 so we've got something to backpropagate:
1036
- v20[0] = wptype(2) * v2result[0]
1037
- v21[0] = wptype(2) * v2result[1]
1038
-
1039
- v30[0] = wptype(2) * v3result[0]
1040
- v31[0] = wptype(2) * v3result[1]
1041
- v32[0] = wptype(2) * v3result[2]
1042
-
1043
- v40[0] = wptype(2) * v4result[0]
1044
- v41[0] = wptype(2) * v4result[1]
1045
- v42[0] = wptype(2) * v4result[2]
1046
- v43[0] = wptype(2) * v4result[3]
1047
-
1048
- v50[0] = wptype(2) * v5result[0]
1049
- v51[0] = wptype(2) * v5result[1]
1050
- v52[0] = wptype(2) * v5result[2]
1051
- v53[0] = wptype(2) * v5result[3]
1052
- v54[0] = wptype(2) * v5result[4]
1053
-
1054
- kernel = getkernel(check_rightmul, suffix=dtype.__name__)
1055
-
1056
- if register_kernels:
1057
- return
1058
-
1059
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
1060
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1061
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1062
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1063
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1064
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1065
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1066
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1067
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1068
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1069
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1070
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1071
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1072
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1073
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1074
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1075
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1076
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1077
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1078
- tape = wp.Tape()
1079
- with tape:
1080
- wp.launch(
1081
- kernel,
1082
- dim=1,
1083
- inputs=[
1084
- s,
1085
- v2,
1086
- v3,
1087
- v4,
1088
- v5,
1089
- ],
1090
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1091
- device=device,
1092
- )
1093
-
1094
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
1095
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
1096
-
1097
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
1098
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
1099
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
1100
-
1101
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
1102
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
1103
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
1104
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
1105
-
1106
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
1107
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
1108
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
1109
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
1110
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
1111
-
1112
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1113
-
1114
- if dtype in np_float_types:
1115
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
1116
- tape.backward(loss=l)
1117
- sgrad = tape.gradients[s].numpy()[0]
1118
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
1119
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
1120
- expected_grads = np.zeros_like(allgrads)
1121
- expected_grads[i] = s.numpy()[0] * 2
1122
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1123
- tape.zero()
1124
-
1125
-
1126
- def test_cw_multiplication(test, device, dtype, register_kernels=False):
1127
- np.random.seed(123)
1128
-
1129
- tol = {
1130
- np.float16: 5.0e-3,
1131
- np.float32: 1.0e-6,
1132
- np.float64: 1.0e-8,
1133
- }.get(dtype, 0)
1134
-
1135
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1136
- vec2 = wp.types.vector(length=2, dtype=wptype)
1137
- vec3 = wp.types.vector(length=3, dtype=wptype)
1138
- vec4 = wp.types.vector(length=4, dtype=wptype)
1139
- vec5 = wp.types.vector(length=5, dtype=wptype)
1140
-
1141
- def check_cw_mul(
1142
- s2: wp.array(dtype=vec2),
1143
- s3: wp.array(dtype=vec3),
1144
- s4: wp.array(dtype=vec4),
1145
- s5: wp.array(dtype=vec5),
1146
- v2: wp.array(dtype=vec2),
1147
- v3: wp.array(dtype=vec3),
1148
- v4: wp.array(dtype=vec4),
1149
- v5: wp.array(dtype=vec5),
1150
- v20: wp.array(dtype=wptype),
1151
- v21: wp.array(dtype=wptype),
1152
- v30: wp.array(dtype=wptype),
1153
- v31: wp.array(dtype=wptype),
1154
- v32: wp.array(dtype=wptype),
1155
- v40: wp.array(dtype=wptype),
1156
- v41: wp.array(dtype=wptype),
1157
- v42: wp.array(dtype=wptype),
1158
- v43: wp.array(dtype=wptype),
1159
- v50: wp.array(dtype=wptype),
1160
- v51: wp.array(dtype=wptype),
1161
- v52: wp.array(dtype=wptype),
1162
- v53: wp.array(dtype=wptype),
1163
- v54: wp.array(dtype=wptype),
1164
- ):
1165
- v2result = wp.cw_mul(s2[0], v2[0])
1166
- v3result = wp.cw_mul(s3[0], v3[0])
1167
- v4result = wp.cw_mul(s4[0], v4[0])
1168
- v5result = wp.cw_mul(s5[0], v5[0])
1169
-
1170
- v20[0] = wptype(2) * v2result[0]
1171
- v21[0] = wptype(2) * v2result[1]
1172
-
1173
- v30[0] = wptype(2) * v3result[0]
1174
- v31[0] = wptype(2) * v3result[1]
1175
- v32[0] = wptype(2) * v3result[2]
1176
-
1177
- v40[0] = wptype(2) * v4result[0]
1178
- v41[0] = wptype(2) * v4result[1]
1179
- v42[0] = wptype(2) * v4result[2]
1180
- v43[0] = wptype(2) * v4result[3]
1181
-
1182
- v50[0] = wptype(2) * v5result[0]
1183
- v51[0] = wptype(2) * v5result[1]
1184
- v52[0] = wptype(2) * v5result[2]
1185
- v53[0] = wptype(2) * v5result[3]
1186
- v54[0] = wptype(2) * v5result[4]
1187
-
1188
- kernel = getkernel(check_cw_mul, suffix=dtype.__name__)
1189
-
1190
- if register_kernels:
1191
- return
1192
-
1193
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1194
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1195
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1196
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1197
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1198
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1199
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1200
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1201
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1202
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1203
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1204
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1205
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1206
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1207
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1208
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1209
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1210
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1211
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1212
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1213
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1214
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1215
- tape = wp.Tape()
1216
- with tape:
1217
- wp.launch(
1218
- kernel,
1219
- dim=1,
1220
- inputs=[
1221
- s2,
1222
- s3,
1223
- s4,
1224
- s5,
1225
- v2,
1226
- v3,
1227
- v4,
1228
- v5,
1229
- ],
1230
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1231
- device=device,
1232
- )
1233
-
1234
- assert_np_equal(v20.numpy()[0], 2 * s2.numpy()[0, 0] * v2.numpy()[0, 0], tol=10 * tol)
1235
- assert_np_equal(v21.numpy()[0], 2 * s2.numpy()[0, 1] * v2.numpy()[0, 1], tol=10 * tol)
1236
-
1237
- assert_np_equal(v30.numpy()[0], 2 * s3.numpy()[0, 0] * v3.numpy()[0, 0], tol=10 * tol)
1238
- assert_np_equal(v31.numpy()[0], 2 * s3.numpy()[0, 1] * v3.numpy()[0, 1], tol=10 * tol)
1239
- assert_np_equal(v32.numpy()[0], 2 * s3.numpy()[0, 2] * v3.numpy()[0, 2], tol=10 * tol)
1240
-
1241
- assert_np_equal(v40.numpy()[0], 2 * s4.numpy()[0, 0] * v4.numpy()[0, 0], tol=10 * tol)
1242
- assert_np_equal(v41.numpy()[0], 2 * s4.numpy()[0, 1] * v4.numpy()[0, 1], tol=10 * tol)
1243
- assert_np_equal(v42.numpy()[0], 2 * s4.numpy()[0, 2] * v4.numpy()[0, 2], tol=10 * tol)
1244
- assert_np_equal(v43.numpy()[0], 2 * s4.numpy()[0, 3] * v4.numpy()[0, 3], tol=10 * tol)
1245
-
1246
- assert_np_equal(v50.numpy()[0], 2 * s5.numpy()[0, 0] * v5.numpy()[0, 0], tol=10 * tol)
1247
- assert_np_equal(v51.numpy()[0], 2 * s5.numpy()[0, 1] * v5.numpy()[0, 1], tol=10 * tol)
1248
- assert_np_equal(v52.numpy()[0], 2 * s5.numpy()[0, 2] * v5.numpy()[0, 2], tol=10 * tol)
1249
- assert_np_equal(v53.numpy()[0], 2 * s5.numpy()[0, 3] * v5.numpy()[0, 3], tol=10 * tol)
1250
- assert_np_equal(v54.numpy()[0], 2 * s5.numpy()[0, 4] * v5.numpy()[0, 4], tol=10 * tol)
1251
-
1252
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1253
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
1254
-
1255
- if dtype in np_float_types:
1256
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1257
- tape.backward(loss=l)
1258
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1259
- expected_grads = np.zeros_like(sgrads)
1260
- expected_grads[i] = incmps[i] * 2
1261
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1262
-
1263
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1264
- expected_grads = np.zeros_like(allgrads)
1265
- expected_grads[i] = scmps[i] * 2
1266
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1267
-
1268
- tape.zero()
1269
-
1270
-
1271
- def test_scalar_division(test, device, dtype, register_kernels=False):
1272
- np.random.seed(123)
1273
-
1274
- tol = {
1275
- np.float16: 5.0e-3,
1276
- np.float32: 1.0e-6,
1277
- np.float64: 1.0e-8,
1278
- }.get(dtype, 0)
1279
-
1280
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1281
- vec2 = wp.types.vector(length=2, dtype=wptype)
1282
- vec3 = wp.types.vector(length=3, dtype=wptype)
1283
- vec4 = wp.types.vector(length=4, dtype=wptype)
1284
- vec5 = wp.types.vector(length=5, dtype=wptype)
1285
-
1286
- def check_div(
1287
- s: wp.array(dtype=wptype),
1288
- v2: wp.array(dtype=vec2),
1289
- v3: wp.array(dtype=vec3),
1290
- v4: wp.array(dtype=vec4),
1291
- v5: wp.array(dtype=vec5),
1292
- v20: wp.array(dtype=wptype),
1293
- v21: wp.array(dtype=wptype),
1294
- v30: wp.array(dtype=wptype),
1295
- v31: wp.array(dtype=wptype),
1296
- v32: wp.array(dtype=wptype),
1297
- v40: wp.array(dtype=wptype),
1298
- v41: wp.array(dtype=wptype),
1299
- v42: wp.array(dtype=wptype),
1300
- v43: wp.array(dtype=wptype),
1301
- v50: wp.array(dtype=wptype),
1302
- v51: wp.array(dtype=wptype),
1303
- v52: wp.array(dtype=wptype),
1304
- v53: wp.array(dtype=wptype),
1305
- v54: wp.array(dtype=wptype),
1306
- ):
1307
- v2result = v2[0] / s[0]
1308
- v3result = v3[0] / s[0]
1309
- v4result = v4[0] / s[0]
1310
- v5result = v5[0] / s[0]
1311
-
1312
- v20[0] = wptype(2) * v2result[0]
1313
- v21[0] = wptype(2) * v2result[1]
1314
-
1315
- v30[0] = wptype(2) * v3result[0]
1316
- v31[0] = wptype(2) * v3result[1]
1317
- v32[0] = wptype(2) * v3result[2]
1318
-
1319
- v40[0] = wptype(2) * v4result[0]
1320
- v41[0] = wptype(2) * v4result[1]
1321
- v42[0] = wptype(2) * v4result[2]
1322
- v43[0] = wptype(2) * v4result[3]
1323
-
1324
- v50[0] = wptype(2) * v5result[0]
1325
- v51[0] = wptype(2) * v5result[1]
1326
- v52[0] = wptype(2) * v5result[2]
1327
- v53[0] = wptype(2) * v5result[3]
1328
- v54[0] = wptype(2) * v5result[4]
1329
-
1330
- kernel = getkernel(check_div, suffix=dtype.__name__)
1331
-
1332
- if register_kernels:
1333
- return
1334
-
1335
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
1336
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1337
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1338
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1339
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1340
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1341
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1342
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1343
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1344
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1345
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1346
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1347
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1348
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1349
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1350
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1351
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1352
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1353
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1354
- tape = wp.Tape()
1355
- with tape:
1356
135
  wp.launch(
1357
136
  kernel,
1358
137
  dim=1,
1359
- inputs=[
1360
- s,
1361
- v2,
1362
- v3,
1363
- v4,
1364
- v5,
1365
- ],
1366
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
138
+ inputs=[],
1367
139
  device=device,
1368
- )
1369
-
1370
- if dtype in np_int_types:
1371
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // (s.numpy()[0])), tol=tol)
1372
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // (s.numpy()[0])), tol=tol)
1373
-
1374
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1375
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1376
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1377
-
1378
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1379
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1380
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1381
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1382
-
1383
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1384
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1385
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1386
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1387
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // (s.numpy()[0])), tol=10 * tol)
1388
-
1389
- else:
1390
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / (s.numpy()[0]), tol=tol)
1391
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / (s.numpy()[0]), tol=tol)
1392
-
1393
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1394
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1395
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1396
-
1397
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1398
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1399
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1400
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1401
-
1402
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1403
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1404
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1405
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1406
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / (s.numpy()[0]), tol=10 * tol)
1407
-
1408
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1409
-
1410
- if dtype in np_float_types:
1411
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1412
- tape.backward(loss=l)
1413
- sgrad = tape.gradients[s].numpy()[0]
1414
-
1415
- # d/ds v/s = -v/s^2
1416
- assert_np_equal(sgrad, -2 * incmps[i] / (s.numpy()[0] * s.numpy()[0]), tol=10 * tol)
1417
-
1418
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1419
- expected_grads = np.zeros_like(allgrads)
1420
- expected_grads[i] = 2 / s.numpy()[0]
1421
-
1422
- # d/dv v/s = 1/s
1423
- assert_np_equal(allgrads, expected_grads, tol=tol)
1424
- tape.zero()
1425
-
1426
-
1427
- def test_cw_division(test, device, dtype, register_kernels=False):
1428
- np.random.seed(123)
1429
-
1430
- tol = {
1431
- np.float16: 1.0e-2,
1432
- np.float32: 1.0e-6,
1433
- np.float64: 1.0e-8,
1434
- }.get(dtype, 0)
1435
-
1436
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1437
- vec2 = wp.types.vector(length=2, dtype=wptype)
1438
- vec3 = wp.types.vector(length=3, dtype=wptype)
1439
- vec4 = wp.types.vector(length=4, dtype=wptype)
1440
- vec5 = wp.types.vector(length=5, dtype=wptype)
1441
-
1442
- def check_cw_div(
1443
- s2: wp.array(dtype=vec2),
1444
- s3: wp.array(dtype=vec3),
1445
- s4: wp.array(dtype=vec4),
1446
- s5: wp.array(dtype=vec5),
1447
- v2: wp.array(dtype=vec2),
1448
- v3: wp.array(dtype=vec3),
1449
- v4: wp.array(dtype=vec4),
1450
- v5: wp.array(dtype=vec5),
1451
- v20: wp.array(dtype=wptype),
1452
- v21: wp.array(dtype=wptype),
1453
- v30: wp.array(dtype=wptype),
1454
- v31: wp.array(dtype=wptype),
1455
- v32: wp.array(dtype=wptype),
1456
- v40: wp.array(dtype=wptype),
1457
- v41: wp.array(dtype=wptype),
1458
- v42: wp.array(dtype=wptype),
1459
- v43: wp.array(dtype=wptype),
1460
- v50: wp.array(dtype=wptype),
1461
- v51: wp.array(dtype=wptype),
1462
- v52: wp.array(dtype=wptype),
1463
- v53: wp.array(dtype=wptype),
1464
- v54: wp.array(dtype=wptype),
1465
- ):
1466
- v2result = wp.cw_div(v2[0], s2[0])
1467
- v3result = wp.cw_div(v3[0], s3[0])
1468
- v4result = wp.cw_div(v4[0], s4[0])
1469
- v5result = wp.cw_div(v5[0], s5[0])
1470
-
1471
- v20[0] = wptype(2) * v2result[0]
1472
- v21[0] = wptype(2) * v2result[1]
1473
-
1474
- v30[0] = wptype(2) * v3result[0]
1475
- v31[0] = wptype(2) * v3result[1]
1476
- v32[0] = wptype(2) * v3result[2]
140
+ )
1477
141
 
1478
- v40[0] = wptype(2) * v4result[0]
1479
- v41[0] = wptype(2) * v4result[1]
1480
- v42[0] = wptype(2) * v4result[2]
1481
- v43[0] = wptype(2) * v4result[3]
1482
142
 
1483
- v50[0] = wptype(2) * v5result[0]
1484
- v51[0] = wptype(2) * v5result[1]
1485
- v52[0] = wptype(2) * v5result[2]
1486
- v53[0] = wptype(2) * v5result[3]
1487
- v54[0] = wptype(2) * v5result[4]
143
+ def test_anon_constructor_error_numeric_args_mismatch(test, device):
144
+ @wp.kernel
145
+ def kernel():
146
+ wp.vector(1.0, 2)
1488
147
 
1489
- kernel = getkernel(check_cw_div, suffix=dtype.__name__)
148
+ with test.assertRaisesRegex(
149
+ RuntimeError,
150
+ r"All numeric arguments to vec\(\) constructor should have the same "
151
+ r"type, expected 2 arg_types of type <class 'warp.types.float32'>, "
152
+ r"received <class 'warp.types.float32'>,<class 'warp.types.int32'>$",
153
+ ):
154
+ wp.launch(
155
+ kernel,
156
+ dim=1,
157
+ inputs=[],
158
+ device=device,
159
+ )
1490
160
 
1491
- if register_kernels:
1492
- return
1493
161
 
1494
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1495
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1496
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1497
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1498
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1499
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1500
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1501
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1502
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1503
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1504
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1505
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1506
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1507
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1508
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1509
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1510
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1511
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1512
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1513
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1514
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1515
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1516
- tape = wp.Tape()
1517
- with tape:
162
+ def test_tpl_constructor_error_incompatible_sizes(test, device):
163
+ @wp.kernel
164
+ def kernel():
165
+ wp.vec3(wp.vec2(1.0, 2.0))
166
+
167
+ with test.assertRaisesRegex(RuntimeError, r"Incompatible matrix sizes for casting copy constructor, 3 vs 2"):
1518
168
  wp.launch(
1519
169
  kernel,
1520
170
  dim=1,
1521
- inputs=[
1522
- s2,
1523
- s3,
1524
- s4,
1525
- s5,
1526
- v2,
1527
- v3,
1528
- v4,
1529
- v5,
1530
- ],
1531
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
171
+ inputs=[],
1532
172
  device=device,
1533
173
  )
1534
174
 
1535
- if dtype in np_int_types:
1536
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // s2.numpy()[0, 0]), tol=tol)
1537
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // s2.numpy()[0, 1]), tol=tol)
1538
-
1539
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // s3.numpy()[0, 0]), tol=tol)
1540
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // s3.numpy()[0, 1]), tol=tol)
1541
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // s3.numpy()[0, 2]), tol=tol)
1542
-
1543
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // s4.numpy()[0, 0]), tol=tol)
1544
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // s4.numpy()[0, 1]), tol=tol)
1545
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // s4.numpy()[0, 2]), tol=tol)
1546
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // s4.numpy()[0, 3]), tol=tol)
1547
-
1548
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // s5.numpy()[0, 0]), tol=tol)
1549
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // s5.numpy()[0, 1]), tol=tol)
1550
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // s5.numpy()[0, 2]), tol=tol)
1551
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // s5.numpy()[0, 3]), tol=tol)
1552
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // s5.numpy()[0, 4]), tol=tol)
1553
- else:
1554
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / s2.numpy()[0, 0], tol=tol)
1555
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / s2.numpy()[0, 1], tol=tol)
1556
-
1557
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / s3.numpy()[0, 0], tol=tol)
1558
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / s3.numpy()[0, 1], tol=tol)
1559
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / s3.numpy()[0, 2], tol=tol)
1560
-
1561
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / s4.numpy()[0, 0], tol=tol)
1562
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / s4.numpy()[0, 1], tol=tol)
1563
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / s4.numpy()[0, 2], tol=tol)
1564
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / s4.numpy()[0, 3], tol=tol)
1565
-
1566
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / s5.numpy()[0, 0], tol=tol)
1567
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / s5.numpy()[0, 1], tol=tol)
1568
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / s5.numpy()[0, 2], tol=tol)
1569
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / s5.numpy()[0, 3], tol=tol)
1570
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / s5.numpy()[0, 4], tol=tol)
1571
175
 
1572
- if dtype in np_float_types:
1573
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1574
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
176
+ def test_tpl_constructor_error_numeric_args_mismatch(test, device):
177
+ @wp.kernel
178
+ def kernel():
179
+ wp.vec2(1.0, 2)
1575
180
 
1576
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1577
- tape.backward(loss=l)
1578
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1579
- expected_grads = np.zeros_like(sgrads)
181
+ with test.assertRaisesRegex(
182
+ RuntimeError,
183
+ r"All numeric arguments to vec\(\) constructor should have the same "
184
+ r"type, expected 2 arg_types of type <class 'warp.types.float32'>, "
185
+ r"received <class 'warp.types.float32'>,<class 'warp.types.int32'>$",
186
+ ):
187
+ wp.launch(
188
+ kernel,
189
+ dim=1,
190
+ inputs=[],
191
+ device=device,
192
+ )
1580
193
 
1581
- # d/ds v/s = -v/s^2
1582
- expected_grads[i] = -incmps[i] * 2 / (scmps[i] * scmps[i])
1583
- assert_np_equal(sgrads, expected_grads, tol=20 * tol)
1584
194
 
1585
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1586
- expected_grads = np.zeros_like(allgrads)
195
+ def test_tpl_ops_with_anon(test, device):
196
+ vec3i = wp.vec(3, dtype=int)
1587
197
 
1588
- # d/dv v/s = 1/s
1589
- expected_grads[i] = 2 / scmps[i]
1590
- assert_np_equal(allgrads, expected_grads, tol=tol)
198
+ v = wp.vec3i(1, 2, 3)
199
+ v += vec3i(2, 3, 4)
200
+ v -= vec3i(3, 4, 5)
201
+ test.assertSequenceEqual(v, (0, 1, 2))
1591
202
 
1592
- tape.zero()
203
+ v = vec3i(1, 2, 3)
204
+ v += wp.vec3i(2, 3, 4)
205
+ v -= wp.vec3i(3, 4, 5)
206
+ test.assertSequenceEqual(v, (0, 1, 2))
1593
207
 
1594
208
 
1595
- def test_addition(test, device, dtype, register_kernels=False):
1596
- np.random.seed(123)
209
+ def test_negation(test, device, dtype, register_kernels=False):
210
+ rng = np.random.default_rng(123)
1597
211
 
1598
212
  tol = {
1599
213
  np.float16: 5.0e-3,
@@ -1607,15 +221,15 @@ def test_addition(test, device, dtype, register_kernels=False):
1607
221
  vec4 = wp.types.vector(length=4, dtype=wptype)
1608
222
  vec5 = wp.types.vector(length=5, dtype=wptype)
1609
223
 
1610
- def check_add(
1611
- s2: wp.array(dtype=vec2),
1612
- s3: wp.array(dtype=vec3),
1613
- s4: wp.array(dtype=vec4),
1614
- s5: wp.array(dtype=vec5),
224
+ def check_negation(
1615
225
  v2: wp.array(dtype=vec2),
1616
226
  v3: wp.array(dtype=vec3),
1617
227
  v4: wp.array(dtype=vec4),
1618
228
  v5: wp.array(dtype=vec5),
229
+ v2out: wp.array(dtype=vec2),
230
+ v3out: wp.array(dtype=vec3),
231
+ v4out: wp.array(dtype=vec4),
232
+ v5out: wp.array(dtype=vec5),
1619
233
  v20: wp.array(dtype=wptype),
1620
234
  v21: wp.array(dtype=wptype),
1621
235
  v30: wp.array(dtype=wptype),
@@ -1631,11 +245,17 @@ def test_addition(test, device, dtype, register_kernels=False):
1631
245
  v53: wp.array(dtype=wptype),
1632
246
  v54: wp.array(dtype=wptype),
1633
247
  ):
1634
- v2result = v2[0] + s2[0]
1635
- v3result = v3[0] + s3[0]
1636
- v4result = v4[0] + s4[0]
1637
- v5result = v5[0] + s5[0]
248
+ v2result = -v2[0]
249
+ v3result = -v3[0]
250
+ v4result = -v4[0]
251
+ v5result = -v5[0]
252
+
253
+ v2out[0] = v2result
254
+ v3out[0] = v3result
255
+ v4out[0] = v4result
256
+ v5out[0] = v5result
1638
257
 
258
+ # multiply these outputs by 2 so we've got something to backpropagate:
1639
259
  v20[0] = wptype(2) * v2result[0]
1640
260
  v21[0] = wptype(2) * v2result[1]
1641
261
 
@@ -1654,19 +274,21 @@ def test_addition(test, device, dtype, register_kernels=False):
1654
274
  v53[0] = wptype(2) * v5result[3]
1655
275
  v54[0] = wptype(2) * v5result[4]
1656
276
 
1657
- kernel = getkernel(check_add, suffix=dtype.__name__)
277
+ kernel = getkernel(check_negation, suffix=dtype.__name__)
1658
278
 
1659
279
  if register_kernels:
1660
280
  return
1661
281
 
1662
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1663
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1664
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1665
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1666
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1667
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1668
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1669
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
282
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
283
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
284
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
285
+ v5_np = randvals(rng, (1, 5), dtype)
286
+ v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
287
+
288
+ v2out = wp.zeros(1, dtype=vec2, device=device)
289
+ v3out = wp.zeros(1, dtype=vec3, device=device)
290
+ v4out = wp.zeros(1, dtype=vec4, device=device)
291
+ v5out = wp.zeros(1, dtype=vec5, device=device)
1670
292
  v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1671
293
  v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1672
294
  v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -1681,67 +303,33 @@ def test_addition(test, device, dtype, register_kernels=False):
1681
303
  v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1682
304
  v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1683
305
  v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
306
+
1684
307
  tape = wp.Tape()
1685
308
  with tape:
1686
309
  wp.launch(
1687
310
  kernel,
1688
311
  dim=1,
1689
- inputs=[
1690
- s2,
1691
- s3,
1692
- s4,
1693
- s5,
1694
- v2,
1695
- v3,
1696
- v4,
1697
- v5,
1698
- ],
1699
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
312
+ inputs=[v2, v3, v4, v5],
313
+ outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1700
314
  device=device,
1701
315
  )
1702
316
 
1703
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] + s2.numpy()[0, 0]), tol=tol)
1704
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] + s2.numpy()[0, 1]), tol=tol)
1705
-
1706
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] + s3.numpy()[0, 0]), tol=tol)
1707
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] + s3.numpy()[0, 1]), tol=tol)
1708
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] + s3.numpy()[0, 2]), tol=tol)
1709
-
1710
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] + s4.numpy()[0, 0]), tol=tol)
1711
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] + s4.numpy()[0, 1]), tol=tol)
1712
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] + s4.numpy()[0, 2]), tol=tol)
1713
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] + s4.numpy()[0, 3]), tol=tol)
1714
-
1715
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] + s5.numpy()[0, 0]), tol=tol)
1716
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] + s5.numpy()[0, 1]), tol=tol)
1717
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] + s5.numpy()[0, 2]), tol=tol)
1718
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] + s5.numpy()[0, 3]), tol=tol)
1719
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] + s5.numpy()[0, 4]), tol=2 * tol)
1720
-
1721
317
  if dtype in np_float_types:
1722
318
  for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1723
319
  tape.backward(loss=l)
1724
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1725
- expected_grads = np.zeros_like(sgrads)
1726
-
1727
- expected_grads[i] = 2
1728
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1729
-
1730
320
  allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
321
+ expected_grads = np.zeros_like(allgrads)
322
+ expected_grads[i] = -2
1731
323
  assert_np_equal(allgrads, expected_grads, tol=tol)
1732
-
1733
324
  tape.zero()
1734
325
 
326
+ assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
327
+ assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
328
+ assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
329
+ assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
1735
330
 
1736
- def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1737
- np.random.seed(123)
1738
-
1739
- tol = {
1740
- np.float16: 1.0e-3,
1741
- np.float32: 1.0e-6,
1742
- np.float64: 1.0e-8,
1743
- }.get(dtype, 0)
1744
331
 
332
+ def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1745
333
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1746
334
  vec2 = wp.types.vector(length=2, dtype=wptype)
1747
335
  vec3 = wp.types.vector(length=3, dtype=wptype)
@@ -1790,7 +378,7 @@ def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1790
378
 
1791
379
 
1792
380
  def test_subtraction(test, device, dtype, register_kernels=False):
1793
- np.random.seed(123)
381
+ rng = np.random.default_rng(123)
1794
382
 
1795
383
  tol = {
1796
384
  np.float16: 5.0e-3,
@@ -1857,14 +445,14 @@ def test_subtraction(test, device, dtype, register_kernels=False):
1857
445
  if register_kernels:
1858
446
  return
1859
447
 
1860
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1861
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1862
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1863
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1864
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1865
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1866
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1867
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
448
+ s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
449
+ s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
450
+ s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
451
+ s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
452
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
453
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
454
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
455
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1868
456
  v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1869
457
  v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1870
458
  v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -1935,129 +523,8 @@ def test_subtraction(test, device, dtype, register_kernels=False):
1935
523
  tape.zero()
1936
524
 
1937
525
 
1938
- def test_dotproduct(test, device, dtype, register_kernels=False):
1939
- np.random.seed(123)
1940
-
1941
- tol = {
1942
- np.float16: 1.0e-2,
1943
- np.float32: 1.0e-6,
1944
- np.float64: 1.0e-8,
1945
- }.get(dtype, 0)
1946
-
1947
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1948
- vec2 = wp.types.vector(length=2, dtype=wptype)
1949
- vec3 = wp.types.vector(length=3, dtype=wptype)
1950
- vec4 = wp.types.vector(length=4, dtype=wptype)
1951
- vec5 = wp.types.vector(length=5, dtype=wptype)
1952
-
1953
- def check_dot(
1954
- s2: wp.array(dtype=vec2),
1955
- s3: wp.array(dtype=vec3),
1956
- s4: wp.array(dtype=vec4),
1957
- s5: wp.array(dtype=vec5),
1958
- v2: wp.array(dtype=vec2),
1959
- v3: wp.array(dtype=vec3),
1960
- v4: wp.array(dtype=vec4),
1961
- v5: wp.array(dtype=vec5),
1962
- dot2: wp.array(dtype=wptype),
1963
- dot3: wp.array(dtype=wptype),
1964
- dot4: wp.array(dtype=wptype),
1965
- dot5: wp.array(dtype=wptype),
1966
- ):
1967
- dot2[0] = wptype(2) * wp.dot(v2[0], s2[0])
1968
- dot3[0] = wptype(2) * wp.dot(v3[0], s3[0])
1969
- dot4[0] = wptype(2) * wp.dot(v4[0], s4[0])
1970
- dot5[0] = wptype(2) * wp.dot(v5[0], s5[0])
1971
-
1972
- kernel = getkernel(check_dot, suffix=dtype.__name__)
1973
-
1974
- if register_kernels:
1975
- return
1976
-
1977
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1978
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1979
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1980
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1981
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1982
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1983
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1984
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1985
- dot2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1986
- dot3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1987
- dot4 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1988
- dot5 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1989
- tape = wp.Tape()
1990
- with tape:
1991
- wp.launch(
1992
- kernel,
1993
- dim=1,
1994
- inputs=[
1995
- s2,
1996
- s3,
1997
- s4,
1998
- s5,
1999
- v2,
2000
- v3,
2001
- v4,
2002
- v5,
2003
- ],
2004
- outputs=[dot2, dot3, dot4, dot5],
2005
- device=device,
2006
- )
2007
-
2008
- assert_np_equal(dot2.numpy()[0], 2.0 * (v2.numpy() * s2.numpy()).sum(), tol=10 * tol)
2009
- assert_np_equal(dot3.numpy()[0], 2.0 * (v3.numpy() * s3.numpy()).sum(), tol=10 * tol)
2010
- assert_np_equal(dot4.numpy()[0], 2.0 * (v4.numpy() * s4.numpy()).sum(), tol=10 * tol)
2011
- assert_np_equal(dot5.numpy()[0], 2.0 * (v5.numpy() * s5.numpy()).sum(), tol=10 * tol)
2012
-
2013
- if dtype in np_float_types:
2014
- tape.backward(loss=dot2)
2015
- sgrads = tape.gradients[s2].numpy()[0]
2016
- expected_grads = 2.0 * v2.numpy()[0]
2017
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2018
-
2019
- vgrads = tape.gradients[v2].numpy()[0]
2020
- expected_grads = 2.0 * s2.numpy()[0]
2021
- assert_np_equal(vgrads, expected_grads, tol=tol)
2022
-
2023
- tape.zero()
2024
-
2025
- tape.backward(loss=dot3)
2026
- sgrads = tape.gradients[s3].numpy()[0]
2027
- expected_grads = 2.0 * v3.numpy()[0]
2028
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2029
-
2030
- vgrads = tape.gradients[v3].numpy()[0]
2031
- expected_grads = 2.0 * s3.numpy()[0]
2032
- assert_np_equal(vgrads, expected_grads, tol=tol)
2033
-
2034
- tape.zero()
2035
-
2036
- tape.backward(loss=dot4)
2037
- sgrads = tape.gradients[s4].numpy()[0]
2038
- expected_grads = 2.0 * v4.numpy()[0]
2039
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2040
-
2041
- vgrads = tape.gradients[v4].numpy()[0]
2042
- expected_grads = 2.0 * s4.numpy()[0]
2043
- assert_np_equal(vgrads, expected_grads, tol=tol)
2044
-
2045
- tape.zero()
2046
-
2047
- tape.backward(loss=dot5)
2048
- sgrads = tape.gradients[s5].numpy()[0]
2049
- expected_grads = 2.0 * v5.numpy()[0]
2050
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2051
-
2052
- vgrads = tape.gradients[v5].numpy()[0]
2053
- expected_grads = 2.0 * s5.numpy()[0]
2054
- assert_np_equal(vgrads, expected_grads, tol=10 * tol)
2055
-
2056
- tape.zero()
2057
-
2058
-
2059
526
  def test_length(test, device, dtype, register_kernels=False):
2060
- np.random.seed(123)
527
+ rng = np.random.default_rng(123)
2061
528
 
2062
529
  tol = {
2063
530
  np.float16: 5.0e-3,
@@ -2100,10 +567,10 @@ def test_length(test, device, dtype, register_kernels=False):
2100
567
  if register_kernels:
2101
568
  return
2102
569
 
2103
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2104
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2105
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2106
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
570
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
571
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
572
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
573
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2107
574
 
2108
575
  l2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2109
576
  l3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2190,7 +657,7 @@ def test_length(test, device, dtype, register_kernels=False):
2190
657
 
2191
658
 
2192
659
  def test_normalize(test, device, dtype, register_kernels=False):
2193
- np.random.seed(123)
660
+ rng = np.random.default_rng(123)
2194
661
 
2195
662
  tol = {
2196
663
  np.float16: 5.0e-3,
@@ -2298,10 +765,10 @@ def test_normalize(test, device, dtype, register_kernels=False):
2298
765
 
2299
766
  # I've already tested the things I'm using in check_normalize_alt, so I'll just
2300
767
  # make sure the two are giving the same results/gradients
2301
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2302
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2303
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2304
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
768
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
769
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
770
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
771
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2305
772
 
2306
773
  n20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2307
774
  n21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2423,7 +890,7 @@ def test_normalize(test, device, dtype, register_kernels=False):
2423
890
 
2424
891
 
2425
892
  def test_crossproduct(test, device, dtype, register_kernels=False):
2426
- np.random.seed(123)
893
+ rng = np.random.default_rng(123)
2427
894
 
2428
895
  tol = {
2429
896
  np.float16: 5.0e-3,
@@ -2453,8 +920,8 @@ def test_crossproduct(test, device, dtype, register_kernels=False):
2453
920
  if register_kernels:
2454
921
  return
2455
922
 
2456
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2457
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
923
+ s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
924
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2458
925
  c0 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2459
926
  c1 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2460
927
  c2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2517,216 +984,115 @@ def test_crossproduct(test, device, dtype, register_kernels=False):
2517
984
  tape.zero()
2518
985
 
2519
986
 
2520
- def test_minmax(test, device, dtype, register_kernels=False):
2521
- np.random.seed(123)
987
+ def test_casting_constructors(test, device, dtype, register_kernels=False):
988
+ np_type = np.dtype(dtype)
989
+ wp_type = wp.types.np_dtype_to_warp_type[np_type]
990
+ vec3 = wp.types.vector(length=3, dtype=wp_type)
2522
991
 
2523
- # \TODO: not quite sure why, but the numbers are off for 16 bit float
2524
- # on the cpu (but not cuda). This is probably just the sketchy float16
2525
- # arithmetic I implemented to get all this stuff working, so
2526
- # hopefully that can be fixed when we do that correctly.
2527
- tol = {
2528
- np.float16: 1.0e-2,
2529
- }.get(dtype, 0)
992
+ np16 = np.dtype(np.float16)
993
+ wp16 = wp.types.np_dtype_to_warp_type[np16]
2530
994
 
2531
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2532
- vec2 = wp.types.vector(length=2, dtype=wptype)
2533
- vec3 = wp.types.vector(length=3, dtype=wptype)
2534
- vec4 = wp.types.vector(length=4, dtype=wptype)
2535
- vec5 = wp.types.vector(length=5, dtype=wptype)
995
+ np32 = np.dtype(np.float32)
996
+ wp32 = wp.types.np_dtype_to_warp_type[np32]
2536
997
 
2537
- # \TODO: Also not quite sure why: this kernel compiles incredibly
2538
- # slowly though...
2539
- def check_vec_min_max(
2540
- a: wp.array(dtype=wptype, ndim=2),
2541
- b: wp.array(dtype=wptype, ndim=2),
2542
- mins: wp.array(dtype=wptype, ndim=2),
2543
- maxs: wp.array(dtype=wptype, ndim=2),
2544
- ):
2545
- for i in range(10):
2546
- # multiplying by 2 so we've got something to backpropagate:
2547
- a2read = vec2(a[i, 0], a[i, 1])
2548
- b2read = vec2(b[i, 0], b[i, 1])
2549
- c2 = wptype(2) * wp.min(a2read, b2read)
2550
- d2 = wptype(2) * wp.max(a2read, b2read)
2551
-
2552
- a3read = vec3(a[i, 2], a[i, 3], a[i, 4])
2553
- b3read = vec3(b[i, 2], b[i, 3], b[i, 4])
2554
- c3 = wptype(2) * wp.min(a3read, b3read)
2555
- d3 = wptype(2) * wp.max(a3read, b3read)
2556
-
2557
- a4read = vec4(a[i, 5], a[i, 6], a[i, 7], a[i, 8])
2558
- b4read = vec4(b[i, 5], b[i, 6], b[i, 7], b[i, 8])
2559
- c4 = wptype(2) * wp.min(a4read, b4read)
2560
- d4 = wptype(2) * wp.max(a4read, b4read)
2561
-
2562
- a5read = vec5(a[i, 9], a[i, 10], a[i, 11], a[i, 12], a[i, 13])
2563
- b5read = vec5(b[i, 9], b[i, 10], b[i, 11], b[i, 12], b[i, 13])
2564
- c5 = wptype(2) * wp.min(a5read, b5read)
2565
- d5 = wptype(2) * wp.max(a5read, b5read)
2566
-
2567
- mins[i, 0] = c2[0]
2568
- mins[i, 1] = c2[1]
2569
-
2570
- mins[i, 2] = c3[0]
2571
- mins[i, 3] = c3[1]
2572
- mins[i, 4] = c3[2]
2573
-
2574
- mins[i, 5] = c4[0]
2575
- mins[i, 6] = c4[1]
2576
- mins[i, 7] = c4[2]
2577
- mins[i, 8] = c4[3]
2578
-
2579
- mins[i, 9] = c5[0]
2580
- mins[i, 10] = c5[1]
2581
- mins[i, 11] = c5[2]
2582
- mins[i, 12] = c5[3]
2583
- mins[i, 13] = c5[4]
2584
-
2585
- maxs[i, 0] = d2[0]
2586
- maxs[i, 1] = d2[1]
2587
-
2588
- maxs[i, 2] = d3[0]
2589
- maxs[i, 3] = d3[1]
2590
- maxs[i, 4] = d3[2]
2591
-
2592
- maxs[i, 5] = d4[0]
2593
- maxs[i, 6] = d4[1]
2594
- maxs[i, 7] = d4[2]
2595
- maxs[i, 8] = d4[3]
2596
-
2597
- maxs[i, 9] = d5[0]
2598
- maxs[i, 10] = d5[1]
2599
- maxs[i, 11] = d5[2]
2600
- maxs[i, 12] = d5[3]
2601
- maxs[i, 13] = d5[4]
2602
-
2603
- kernel = getkernel(check_vec_min_max, suffix=dtype.__name__)
2604
- output_select_kernel = get_select_kernel2(wptype)
998
+ np64 = np.dtype(np.float64)
999
+ wp64 = wp.types.np_dtype_to_warp_type[np64]
2605
1000
 
2606
- if register_kernels:
2607
- return
1001
+ def cast_float16(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp16, ndim=2)):
1002
+ tid = wp.tid()
2608
1003
 
2609
- a = wp.array(randvals((10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
2610
- b = wp.array(randvals((10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
1004
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1005
+ v2 = wp.vector(v1, dtype=wp16)
2611
1006
 
2612
- mins = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
2613
- maxs = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
1007
+ b[tid, 0] = v2[0]
1008
+ b[tid, 1] = v2[1]
1009
+ b[tid, 2] = v2[2]
2614
1010
 
2615
- tape = wp.Tape()
2616
- with tape:
2617
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
1011
+ def cast_float32(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp32, ndim=2)):
1012
+ tid = wp.tid()
2618
1013
 
2619
- assert_np_equal(mins.numpy(), 2 * np.minimum(a.numpy(), b.numpy()), tol=tol)
2620
- assert_np_equal(maxs.numpy(), 2 * np.maximum(a.numpy(), b.numpy()), tol=tol)
1014
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1015
+ v2 = wp.vector(v1, dtype=wp32)
2621
1016
 
2622
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2623
- if dtype in np_float_types:
2624
- for i in range(10):
2625
- for j in range(14):
2626
- tape = wp.Tape()
2627
- with tape:
2628
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2629
- wp.launch(output_select_kernel, dim=1, inputs=[mins, i, j], outputs=[out], device=device)
2630
-
2631
- tape.backward(loss=out)
2632
- expected = np.zeros_like(a.numpy())
2633
- expected[i, j] = 2 if (a.numpy()[i, j] < b.numpy()[i, j]) else 0
2634
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2635
- expected[i, j] = 2 if (b.numpy()[i, j] < a.numpy()[i, j]) else 0
2636
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2637
- tape.zero()
2638
-
2639
- tape = wp.Tape()
2640
- with tape:
2641
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2642
- wp.launch(output_select_kernel, dim=1, inputs=[maxs, i, j], outputs=[out], device=device)
2643
-
2644
- tape.backward(loss=out)
2645
- expected = np.zeros_like(a.numpy())
2646
- expected[i, j] = 2 if (a.numpy()[i, j] > b.numpy()[i, j]) else 0
2647
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2648
- expected[i, j] = 2 if (b.numpy()[i, j] > a.numpy()[i, j]) else 0
2649
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2650
- tape.zero()
2651
-
2652
-
2653
- def test_equivalent_types(test, device, dtype, register_kernels=False):
2654
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1017
+ b[tid, 0] = v2[0]
1018
+ b[tid, 1] = v2[1]
1019
+ b[tid, 2] = v2[2]
2655
1020
 
2656
- # vector types
2657
- vec2 = wp.types.vector(length=2, dtype=wptype)
2658
- vec3 = wp.types.vector(length=3, dtype=wptype)
2659
- vec4 = wp.types.vector(length=4, dtype=wptype)
2660
- vec5 = wp.types.vector(length=5, dtype=wptype)
1021
+ def cast_float64(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp64, ndim=2)):
1022
+ tid = wp.tid()
2661
1023
 
2662
- # vector types equivalent to the above
2663
- vec2_equiv = wp.types.vector(length=2, dtype=wptype)
2664
- vec3_equiv = wp.types.vector(length=3, dtype=wptype)
2665
- vec4_equiv = wp.types.vector(length=4, dtype=wptype)
2666
- vec5_equiv = wp.types.vector(length=5, dtype=wptype)
2667
-
2668
- # declare kernel with original types
2669
- def check_equivalence(
2670
- v2: vec2,
2671
- v3: vec3,
2672
- v4: vec4,
2673
- v5: vec5,
2674
- ):
2675
- wp.expect_eq(v2, vec2(wptype(1), wptype(2)))
2676
- wp.expect_eq(v3, vec3(wptype(1), wptype(2), wptype(3)))
2677
- wp.expect_eq(v4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
2678
- wp.expect_eq(v5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
1024
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1025
+ v2 = wp.vector(v1, dtype=wp64)
2679
1026
 
2680
- wp.expect_eq(v2, vec2_equiv(wptype(1), wptype(2)))
2681
- wp.expect_eq(v3, vec3_equiv(wptype(1), wptype(2), wptype(3)))
2682
- wp.expect_eq(v4, vec4_equiv(wptype(1), wptype(2), wptype(3), wptype(4)))
2683
- wp.expect_eq(v5, vec5_equiv(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
1027
+ b[tid, 0] = v2[0]
1028
+ b[tid, 1] = v2[1]
1029
+ b[tid, 2] = v2[2]
2684
1030
 
2685
- kernel = getkernel(check_equivalence, suffix=dtype.__name__)
1031
+ kernel_16 = getkernel(cast_float16, suffix=dtype.__name__)
1032
+ kernel_32 = getkernel(cast_float32, suffix=dtype.__name__)
1033
+ kernel_64 = getkernel(cast_float64, suffix=dtype.__name__)
2686
1034
 
2687
1035
  if register_kernels:
2688
1036
  return
2689
1037
 
2690
- # call kernel with equivalent types
2691
- v2 = vec2_equiv(1, 2)
2692
- v3 = vec3_equiv(1, 2, 3)
2693
- v4 = vec4_equiv(1, 2, 3, 4)
2694
- v5 = vec5_equiv(1, 2, 3, 4, 5)
1038
+ # check casting to float 16
1039
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1040
+ b = wp.array(np.zeros((1, 3), dtype=np16), dtype=wp16, requires_grad=True, device=device)
1041
+ b_result = np.ones((1, 3), dtype=np16)
1042
+ b_grad = wp.array(np.ones((1, 3), dtype=np16), dtype=wp16, device=device)
1043
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2695
1044
 
2696
- wp.launch(kernel, dim=1, inputs=[v2, v3, v4, v5], device=device)
1045
+ tape = wp.Tape()
1046
+ with tape:
1047
+ wp.launch(kernel=kernel_16, dim=1, inputs=[a, b], device=device)
2697
1048
 
1049
+ tape.backward(grads={b: b_grad})
1050
+ out = tape.gradients[a].numpy()
2698
1051
 
2699
- def test_conversions(test, device, dtype, register_kernels=False):
2700
- def check_vectors_equal(
2701
- v0: wp.vec3,
2702
- v1: wp.vec3,
2703
- v2: wp.vec3,
2704
- v3: wp.vec3,
2705
- ):
2706
- wp.expect_eq(v1, v0)
2707
- wp.expect_eq(v2, v0)
2708
- wp.expect_eq(v3, v0)
1052
+ assert_np_equal(b.numpy(), b_result)
1053
+ assert_np_equal(out, a_grad.numpy())
2709
1054
 
2710
- kernel = getkernel(check_vectors_equal, suffix=dtype.__name__)
1055
+ # check casting to float 32
1056
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1057
+ b = wp.array(np.zeros((1, 3), dtype=np32), dtype=wp32, requires_grad=True, device=device)
1058
+ b_result = np.ones((1, 3), dtype=np32)
1059
+ b_grad = wp.array(np.ones((1, 3), dtype=np32), dtype=wp32, device=device)
1060
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2711
1061
 
2712
- if register_kernels:
2713
- return
1062
+ tape = wp.Tape()
1063
+ with tape:
1064
+ wp.launch(kernel=kernel_32, dim=1, inputs=[a, b], device=device)
2714
1065
 
2715
- v0 = wp.vec3(1, 2, 3)
1066
+ tape.backward(grads={b: b_grad})
1067
+ out = tape.gradients[a].numpy()
2716
1068
 
2717
- # test explicit conversions - constructing vectors from different containers
2718
- v1 = wp.vec3((1, 2, 3))
2719
- v2 = wp.vec3([1, 2, 3])
2720
- v3 = wp.vec3(np.array([1, 2, 3], dtype=dtype))
1069
+ assert_np_equal(b.numpy(), b_result)
1070
+ assert_np_equal(out, a_grad.numpy())
2721
1071
 
2722
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
1072
+ # check casting to float 64
1073
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1074
+ b = wp.array(np.zeros((1, 3), dtype=np64), dtype=wp64, requires_grad=True, device=device)
1075
+ b_result = np.ones((1, 3), dtype=np64)
1076
+ b_grad = wp.array(np.ones((1, 3), dtype=np64), dtype=wp64, device=device)
1077
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2723
1078
 
2724
- # test implicit conversions - passing different containers as vectors to wp.launch()
2725
- v1 = (1, 2, 3)
2726
- v2 = [1, 2, 3]
2727
- v3 = np.array([1, 2, 3], dtype=dtype)
1079
+ tape = wp.Tape()
1080
+ with tape:
1081
+ wp.launch(kernel=kernel_64, dim=1, inputs=[a, b], device=device)
1082
+
1083
+ tape.backward(grads={b: b_grad})
1084
+ out = tape.gradients[a].numpy()
1085
+
1086
+ assert_np_equal(b.numpy(), b_result)
1087
+ assert_np_equal(out, a_grad.numpy())
2728
1088
 
2729
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
1089
+
1090
+ @wp.kernel
1091
+ def test_vector_constructor_value_func():
1092
+ a = wp.vec2()
1093
+ b = wp.vector(a, dtype=wp.float16)
1094
+ c = wp.vector(a)
1095
+ d = wp.vector(a, length=2)
2730
1096
 
2731
1097
 
2732
1098
  # Test matrix constructors using explicit type (float16)
@@ -2790,112 +1156,113 @@ def test_constructors_constant_length():
2790
1156
  v[i] = float(i)
2791
1157
 
2792
1158
 
2793
- def register(parent):
2794
- devices = get_test_devices()
1159
+ devices = get_test_devices()
2795
1160
 
2796
- class TestVec(parent):
2797
- pass
2798
1161
 
2799
- add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
2800
- add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
2801
- add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
1162
+ class TestVec(unittest.TestCase):
1163
+ pass
2802
1164
 
2803
- vec10 = wp.types.vector(length=10, dtype=float)
2804
- add_kernel_test(
2805
- TestVec,
2806
- test_vector_mutation,
2807
- dim=1,
2808
- inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
2809
- devices=devices,
2810
- )
2811
1165
 
2812
- for dtype in np_unsigned_int_types:
2813
- add_function_test_register_kernel(
2814
- TestVec,
2815
- f"test_subtraction_unsigned_{dtype.__name__}",
2816
- test_subtraction_unsigned,
2817
- devices=devices,
2818
- dtype=dtype,
2819
- )
1166
+ add_kernel_test(TestVec, test_vector_constructor_value_func, dim=1, devices=devices)
1167
+ add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
1168
+ add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
1169
+ add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
2820
1170
 
2821
- for dtype in np_signed_int_types + np_float_types:
2822
- add_function_test_register_kernel(
2823
- TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
2824
- )
2825
- add_function_test_register_kernel(
2826
- TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
2827
- )
1171
+ vec10 = wp.types.vector(length=10, dtype=float)
1172
+ add_kernel_test(
1173
+ TestVec,
1174
+ test_vector_mutation,
1175
+ dim=1,
1176
+ inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
1177
+ devices=devices,
1178
+ )
2828
1179
 
2829
- for dtype in np_float_types:
2830
- add_function_test_register_kernel(
2831
- TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
2832
- )
2833
- add_function_test_register_kernel(
2834
- TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
2835
- )
2836
- add_function_test_register_kernel(
2837
- TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
2838
- )
1180
+ for dtype in np_unsigned_int_types:
1181
+ add_function_test_register_kernel(
1182
+ TestVec,
1183
+ f"test_subtraction_unsigned_{dtype.__name__}",
1184
+ test_subtraction_unsigned,
1185
+ devices=devices,
1186
+ dtype=dtype,
1187
+ )
2839
1188
 
2840
- for dtype in np_scalar_types:
2841
- add_function_test(TestVec, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
2842
- add_function_test_register_kernel(
2843
- TestVec, f"test_constructors_{dtype.__name__}", test_constructors, devices=devices, dtype=dtype
2844
- )
2845
- add_function_test_register_kernel(
2846
- TestVec, f"test_anon_type_instance_{dtype.__name__}", test_anon_type_instance, devices=devices, dtype=dtype
2847
- )
2848
- add_function_test_register_kernel(
2849
- TestVec, f"test_indexing_{dtype.__name__}", test_indexing, devices=devices, dtype=dtype
2850
- )
2851
- add_function_test_register_kernel(
2852
- TestVec, f"test_equality_{dtype.__name__}", test_equality, devices=devices, dtype=dtype
2853
- )
2854
- add_function_test_register_kernel(
2855
- TestVec,
2856
- f"test_scalar_multiplication_{dtype.__name__}",
2857
- test_scalar_multiplication,
2858
- devices=devices,
2859
- dtype=dtype,
2860
- )
2861
- add_function_test_register_kernel(
2862
- TestVec,
2863
- f"test_scalar_multiplication_rightmul_{dtype.__name__}",
2864
- test_scalar_multiplication_rightmul,
2865
- devices=devices,
2866
- dtype=dtype,
2867
- )
2868
- add_function_test_register_kernel(
2869
- TestVec, f"test_cw_multiplication_{dtype.__name__}", test_cw_multiplication, devices=devices, dtype=dtype
2870
- )
2871
- add_function_test_register_kernel(
2872
- TestVec, f"test_scalar_division_{dtype.__name__}", test_scalar_division, devices=devices, dtype=dtype
2873
- )
2874
- add_function_test_register_kernel(
2875
- TestVec, f"test_cw_division_{dtype.__name__}", test_cw_division, devices=devices, dtype=dtype
2876
- )
2877
- add_function_test_register_kernel(
2878
- TestVec, f"test_addition_{dtype.__name__}", test_addition, devices=devices, dtype=dtype
2879
- )
2880
- add_function_test_register_kernel(
2881
- TestVec, f"test_dotproduct_{dtype.__name__}", test_dotproduct, devices=devices, dtype=dtype
2882
- )
2883
- add_function_test_register_kernel(
2884
- TestVec, f"test_equivalent_types_{dtype.__name__}", test_equivalent_types, devices=devices, dtype=dtype
2885
- )
2886
- add_function_test_register_kernel(
2887
- TestVec, f"test_conversions_{dtype.__name__}", test_conversions, devices=devices, dtype=dtype
2888
- )
2889
- add_function_test_register_kernel(
2890
- TestVec, f"test_constants_{dtype.__name__}", test_constants, devices=devices, dtype=dtype
2891
- )
1189
+ for dtype in np_signed_int_types + np_float_types:
1190
+ add_function_test_register_kernel(
1191
+ TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
1192
+ )
1193
+ add_function_test_register_kernel(
1194
+ TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
1195
+ )
2892
1196
 
2893
- # the kernels in this test compile incredibly slowly...
2894
- # add_function_test_register_kernel(TestVec, f"test_minmax_{dtype.__name__}", test_minmax, devices=devices, dtype=dtype)
1197
+ for dtype in np_float_types:
1198
+ add_function_test_register_kernel(
1199
+ TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
1200
+ )
1201
+ add_function_test_register_kernel(
1202
+ TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
1203
+ )
1204
+ add_function_test_register_kernel(
1205
+ TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
1206
+ )
1207
+ add_function_test_register_kernel(
1208
+ TestVec,
1209
+ f"test_casting_constructors_{dtype.__name__}",
1210
+ test_casting_constructors,
1211
+ devices=devices,
1212
+ dtype=dtype,
1213
+ )
2895
1214
 
2896
- return TestVec
1215
+ add_function_test(
1216
+ TestVec,
1217
+ "test_anon_constructor_error_dtype_keyword_missing",
1218
+ test_anon_constructor_error_dtype_keyword_missing,
1219
+ devices=devices,
1220
+ )
1221
+ add_function_test(
1222
+ TestVec,
1223
+ "test_anon_constructor_error_length_mismatch",
1224
+ test_anon_constructor_error_length_mismatch,
1225
+ devices=devices,
1226
+ )
1227
+ add_function_test(
1228
+ TestVec,
1229
+ "test_anon_constructor_error_numeric_arg_missing_1",
1230
+ test_anon_constructor_error_numeric_arg_missing_1,
1231
+ devices=devices,
1232
+ )
1233
+ add_function_test(
1234
+ TestVec,
1235
+ "test_anon_constructor_error_numeric_arg_missing_2",
1236
+ test_anon_constructor_error_numeric_arg_missing_2,
1237
+ devices=devices,
1238
+ )
1239
+ add_function_test(
1240
+ TestVec,
1241
+ "test_anon_constructor_error_dtype_keyword_extraneous",
1242
+ test_anon_constructor_error_dtype_keyword_extraneous,
1243
+ devices=devices,
1244
+ )
1245
+ add_function_test(
1246
+ TestVec,
1247
+ "test_anon_constructor_error_numeric_args_mismatch",
1248
+ test_anon_constructor_error_numeric_args_mismatch,
1249
+ devices=devices,
1250
+ )
1251
+ add_function_test(
1252
+ TestVec,
1253
+ "test_tpl_constructor_error_incompatible_sizes",
1254
+ test_tpl_constructor_error_incompatible_sizes,
1255
+ devices=devices,
1256
+ )
1257
+ add_function_test(
1258
+ TestVec,
1259
+ "test_tpl_constructor_error_numeric_args_mismatch",
1260
+ test_tpl_constructor_error_numeric_args_mismatch,
1261
+ devices=devices,
1262
+ )
1263
+ add_function_test(TestVec, "test_tpl_ops_with_anon", test_tpl_ops_with_anon)
2897
1264
 
2898
1265
 
2899
1266
  if __name__ == "__main__":
2900
- c = register(unittest.TestCase)
1267
+ wp.build.clear_kernel_cache()
2901
1268
  unittest.main(verbosity=2, failfast=True)