warp-lang 1.0.1__py3-none-macosx_10_13_universal2.whl → 1.1.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (346) hide show
  1. warp/__init__.py +108 -97
  2. warp/__init__.pyi +1 -1
  3. warp/bin/libwarp-clang.dylib +0 -0
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +115 -113
  6. warp/build_dll.py +383 -375
  7. warp/builtins.py +3425 -3354
  8. warp/codegen.py +2878 -2792
  9. warp/config.py +40 -36
  10. warp/constants.py +45 -45
  11. warp/context.py +5194 -5102
  12. warp/dlpack.py +442 -442
  13. warp/examples/__init__.py +16 -16
  14. warp/examples/assets/bear.usd +0 -0
  15. warp/examples/assets/bunny.usd +0 -0
  16. warp/examples/assets/cartpole.urdf +110 -110
  17. warp/examples/assets/crazyflie.usd +0 -0
  18. warp/examples/assets/cube.usd +0 -0
  19. warp/examples/assets/nv_ant.xml +92 -92
  20. warp/examples/assets/nv_humanoid.xml +183 -183
  21. warp/examples/assets/quadruped.urdf +267 -267
  22. warp/examples/assets/rocks.nvdb +0 -0
  23. warp/examples/assets/rocks.usd +0 -0
  24. warp/examples/assets/sphere.usd +0 -0
  25. warp/examples/benchmarks/benchmark_api.py +383 -383
  26. warp/examples/benchmarks/benchmark_cloth.py +278 -279
  27. warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
  28. warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
  29. warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
  30. warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
  31. warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
  32. warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
  33. warp/examples/benchmarks/benchmark_cloth_warp.py +146 -146
  34. warp/examples/benchmarks/benchmark_launches.py +295 -295
  35. warp/examples/browse.py +29 -28
  36. warp/examples/core/example_dem.py +234 -221
  37. warp/examples/core/example_fluid.py +293 -267
  38. warp/examples/core/example_graph_capture.py +144 -129
  39. warp/examples/core/example_marching_cubes.py +188 -176
  40. warp/examples/core/example_mesh.py +174 -154
  41. warp/examples/core/example_mesh_intersect.py +205 -193
  42. warp/examples/core/example_nvdb.py +176 -169
  43. warp/examples/core/example_raycast.py +105 -89
  44. warp/examples/core/example_raymarch.py +199 -178
  45. warp/examples/core/example_render_opengl.py +185 -141
  46. warp/examples/core/example_sph.py +405 -389
  47. warp/examples/core/example_torch.py +222 -181
  48. warp/examples/core/example_wave.py +263 -249
  49. warp/examples/fem/bsr_utils.py +378 -380
  50. warp/examples/fem/example_apic_fluid.py +407 -391
  51. warp/examples/fem/example_convection_diffusion.py +182 -168
  52. warp/examples/fem/example_convection_diffusion_dg.py +219 -209
  53. warp/examples/fem/example_convection_diffusion_dg0.py +204 -194
  54. warp/examples/fem/example_deformed_geometry.py +177 -159
  55. warp/examples/fem/example_diffusion.py +201 -173
  56. warp/examples/fem/example_diffusion_3d.py +177 -152
  57. warp/examples/fem/example_diffusion_mgpu.py +221 -214
  58. warp/examples/fem/example_mixed_elasticity.py +244 -222
  59. warp/examples/fem/example_navier_stokes.py +259 -243
  60. warp/examples/fem/example_stokes.py +220 -192
  61. warp/examples/fem/example_stokes_transfer.py +265 -249
  62. warp/examples/fem/mesh_utils.py +133 -109
  63. warp/examples/fem/plot_utils.py +292 -287
  64. warp/examples/optim/example_bounce.py +260 -248
  65. warp/examples/optim/example_cloth_throw.py +222 -210
  66. warp/examples/optim/example_diffray.py +566 -535
  67. warp/examples/optim/example_drone.py +864 -835
  68. warp/examples/optim/example_inverse_kinematics.py +176 -169
  69. warp/examples/optim/example_inverse_kinematics_torch.py +185 -170
  70. warp/examples/optim/example_spring_cage.py +239 -234
  71. warp/examples/optim/example_trajectory.py +223 -201
  72. warp/examples/optim/example_walker.py +306 -292
  73. warp/examples/sim/example_cartpole.py +139 -128
  74. warp/examples/sim/example_cloth.py +196 -184
  75. warp/examples/sim/example_granular.py +124 -113
  76. warp/examples/sim/example_granular_collision_sdf.py +197 -185
  77. warp/examples/sim/example_jacobian_ik.py +236 -213
  78. warp/examples/sim/example_particle_chain.py +118 -106
  79. warp/examples/sim/example_quadruped.py +193 -179
  80. warp/examples/sim/example_rigid_chain.py +197 -189
  81. warp/examples/sim/example_rigid_contact.py +189 -176
  82. warp/examples/sim/example_rigid_force.py +127 -126
  83. warp/examples/sim/example_rigid_gyroscopic.py +109 -97
  84. warp/examples/sim/example_rigid_soft_contact.py +134 -124
  85. warp/examples/sim/example_soft_body.py +190 -178
  86. warp/fabric.py +337 -335
  87. warp/fem/__init__.py +60 -27
  88. warp/fem/cache.py +401 -388
  89. warp/fem/dirichlet.py +178 -179
  90. warp/fem/domain.py +262 -263
  91. warp/fem/field/__init__.py +100 -101
  92. warp/fem/field/field.py +148 -149
  93. warp/fem/field/nodal_field.py +298 -299
  94. warp/fem/field/restriction.py +22 -21
  95. warp/fem/field/test.py +180 -181
  96. warp/fem/field/trial.py +183 -183
  97. warp/fem/geometry/__init__.py +15 -19
  98. warp/fem/geometry/closest_point.py +69 -70
  99. warp/fem/geometry/deformed_geometry.py +270 -271
  100. warp/fem/geometry/element.py +744 -744
  101. warp/fem/geometry/geometry.py +184 -186
  102. warp/fem/geometry/grid_2d.py +380 -373
  103. warp/fem/geometry/grid_3d.py +441 -435
  104. warp/fem/geometry/hexmesh.py +953 -953
  105. warp/fem/geometry/partition.py +374 -376
  106. warp/fem/geometry/quadmesh_2d.py +532 -532
  107. warp/fem/geometry/tetmesh.py +840 -840
  108. warp/fem/geometry/trimesh_2d.py +577 -577
  109. warp/fem/integrate.py +1630 -1615
  110. warp/fem/operator.py +190 -191
  111. warp/fem/polynomial.py +214 -213
  112. warp/fem/quadrature/__init__.py +2 -2
  113. warp/fem/quadrature/pic_quadrature.py +243 -245
  114. warp/fem/quadrature/quadrature.py +295 -294
  115. warp/fem/space/__init__.py +294 -292
  116. warp/fem/space/basis_space.py +488 -489
  117. warp/fem/space/collocated_function_space.py +100 -105
  118. warp/fem/space/dof_mapper.py +236 -236
  119. warp/fem/space/function_space.py +148 -145
  120. warp/fem/space/grid_2d_function_space.py +267 -267
  121. warp/fem/space/grid_3d_function_space.py +305 -306
  122. warp/fem/space/hexmesh_function_space.py +350 -352
  123. warp/fem/space/partition.py +350 -350
  124. warp/fem/space/quadmesh_2d_function_space.py +368 -369
  125. warp/fem/space/restriction.py +158 -160
  126. warp/fem/space/shape/__init__.py +13 -15
  127. warp/fem/space/shape/cube_shape_function.py +738 -738
  128. warp/fem/space/shape/shape_function.py +102 -103
  129. warp/fem/space/shape/square_shape_function.py +611 -611
  130. warp/fem/space/shape/tet_shape_function.py +565 -567
  131. warp/fem/space/shape/triangle_shape_function.py +429 -429
  132. warp/fem/space/tetmesh_function_space.py +294 -292
  133. warp/fem/space/topology.py +297 -295
  134. warp/fem/space/trimesh_2d_function_space.py +223 -221
  135. warp/fem/types.py +77 -77
  136. warp/fem/utils.py +495 -495
  137. warp/jax.py +166 -141
  138. warp/jax_experimental.py +341 -339
  139. warp/native/array.h +1072 -1025
  140. warp/native/builtin.h +1560 -1560
  141. warp/native/bvh.cpp +398 -398
  142. warp/native/bvh.cu +525 -525
  143. warp/native/bvh.h +429 -429
  144. warp/native/clang/clang.cpp +495 -464
  145. warp/native/crt.cpp +31 -31
  146. warp/native/crt.h +334 -334
  147. warp/native/cuda_crt.h +1049 -1049
  148. warp/native/cuda_util.cpp +549 -540
  149. warp/native/cuda_util.h +288 -203
  150. warp/native/cutlass_gemm.cpp +34 -34
  151. warp/native/cutlass_gemm.cu +372 -372
  152. warp/native/error.cpp +66 -66
  153. warp/native/error.h +27 -27
  154. warp/native/fabric.h +228 -228
  155. warp/native/hashgrid.cpp +301 -278
  156. warp/native/hashgrid.cu +78 -77
  157. warp/native/hashgrid.h +227 -227
  158. warp/native/initializer_array.h +32 -32
  159. warp/native/intersect.h +1204 -1204
  160. warp/native/intersect_adj.h +365 -365
  161. warp/native/intersect_tri.h +322 -322
  162. warp/native/marching.cpp +2 -2
  163. warp/native/marching.cu +497 -497
  164. warp/native/marching.h +2 -2
  165. warp/native/mat.h +1498 -1498
  166. warp/native/matnn.h +333 -333
  167. warp/native/mesh.cpp +203 -203
  168. warp/native/mesh.cu +293 -293
  169. warp/native/mesh.h +1887 -1887
  170. warp/native/nanovdb/NanoVDB.h +4782 -4782
  171. warp/native/nanovdb/PNanoVDB.h +2553 -2553
  172. warp/native/nanovdb/PNanoVDBWrite.h +294 -294
  173. warp/native/noise.h +850 -850
  174. warp/native/quat.h +1084 -1084
  175. warp/native/rand.h +299 -299
  176. warp/native/range.h +108 -108
  177. warp/native/reduce.cpp +156 -156
  178. warp/native/reduce.cu +348 -348
  179. warp/native/runlength_encode.cpp +61 -61
  180. warp/native/runlength_encode.cu +46 -46
  181. warp/native/scan.cpp +30 -30
  182. warp/native/scan.cu +36 -36
  183. warp/native/scan.h +7 -7
  184. warp/native/solid_angle.h +442 -442
  185. warp/native/sort.cpp +94 -94
  186. warp/native/sort.cu +97 -97
  187. warp/native/sort.h +14 -14
  188. warp/native/sparse.cpp +337 -337
  189. warp/native/sparse.cu +544 -544
  190. warp/native/spatial.h +630 -630
  191. warp/native/svd.h +562 -562
  192. warp/native/temp_buffer.h +30 -30
  193. warp/native/vec.h +1132 -1132
  194. warp/native/volume.cpp +297 -297
  195. warp/native/volume.cu +32 -32
  196. warp/native/volume.h +538 -538
  197. warp/native/volume_builder.cu +425 -425
  198. warp/native/volume_builder.h +19 -19
  199. warp/native/warp.cpp +1057 -1052
  200. warp/native/warp.cu +2943 -2828
  201. warp/native/warp.h +313 -305
  202. warp/optim/__init__.py +9 -9
  203. warp/optim/adam.py +120 -120
  204. warp/optim/linear.py +1104 -939
  205. warp/optim/sgd.py +104 -92
  206. warp/render/__init__.py +10 -10
  207. warp/render/render_opengl.py +3217 -3204
  208. warp/render/render_usd.py +768 -749
  209. warp/render/utils.py +152 -150
  210. warp/sim/__init__.py +52 -59
  211. warp/sim/articulation.py +685 -685
  212. warp/sim/collide.py +1594 -1590
  213. warp/sim/import_mjcf.py +489 -481
  214. warp/sim/import_snu.py +220 -221
  215. warp/sim/import_urdf.py +536 -516
  216. warp/sim/import_usd.py +887 -881
  217. warp/sim/inertia.py +316 -317
  218. warp/sim/integrator.py +234 -233
  219. warp/sim/integrator_euler.py +1956 -1956
  220. warp/sim/integrator_featherstone.py +1910 -1991
  221. warp/sim/integrator_xpbd.py +3294 -3312
  222. warp/sim/model.py +4473 -4314
  223. warp/sim/particles.py +113 -112
  224. warp/sim/render.py +417 -403
  225. warp/sim/utils.py +413 -410
  226. warp/sparse.py +1227 -1227
  227. warp/stubs.py +2109 -2469
  228. warp/tape.py +1162 -225
  229. warp/tests/__init__.py +1 -1
  230. warp/tests/__main__.py +4 -4
  231. warp/tests/assets/torus.usda +105 -105
  232. warp/tests/aux_test_class_kernel.py +26 -26
  233. warp/tests/aux_test_compile_consts_dummy.py +10 -10
  234. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
  235. warp/tests/aux_test_dependent.py +22 -22
  236. warp/tests/aux_test_grad_customs.py +23 -23
  237. warp/tests/aux_test_reference.py +11 -11
  238. warp/tests/aux_test_reference_reference.py +10 -10
  239. warp/tests/aux_test_square.py +17 -17
  240. warp/tests/aux_test_unresolved_func.py +14 -14
  241. warp/tests/aux_test_unresolved_symbol.py +14 -14
  242. warp/tests/disabled_kinematics.py +239 -239
  243. warp/tests/run_coverage_serial.py +31 -31
  244. warp/tests/test_adam.py +157 -157
  245. warp/tests/test_arithmetic.py +1124 -1124
  246. warp/tests/test_array.py +2417 -2326
  247. warp/tests/test_array_reduce.py +150 -150
  248. warp/tests/test_async.py +668 -656
  249. warp/tests/test_atomic.py +141 -141
  250. warp/tests/test_bool.py +204 -149
  251. warp/tests/test_builtins_resolution.py +1292 -1292
  252. warp/tests/test_bvh.py +164 -171
  253. warp/tests/test_closest_point_edge_edge.py +228 -228
  254. warp/tests/test_codegen.py +566 -553
  255. warp/tests/test_compile_consts.py +97 -101
  256. warp/tests/test_conditional.py +246 -246
  257. warp/tests/test_copy.py +232 -215
  258. warp/tests/test_ctypes.py +632 -632
  259. warp/tests/test_dense.py +67 -67
  260. warp/tests/test_devices.py +91 -98
  261. warp/tests/test_dlpack.py +530 -529
  262. warp/tests/test_examples.py +400 -378
  263. warp/tests/test_fabricarray.py +955 -955
  264. warp/tests/test_fast_math.py +62 -54
  265. warp/tests/test_fem.py +1277 -1278
  266. warp/tests/test_fp16.py +130 -130
  267. warp/tests/test_func.py +338 -337
  268. warp/tests/test_generics.py +571 -571
  269. warp/tests/test_grad.py +746 -640
  270. warp/tests/test_grad_customs.py +333 -336
  271. warp/tests/test_hash_grid.py +210 -164
  272. warp/tests/test_import.py +39 -39
  273. warp/tests/test_indexedarray.py +1134 -1134
  274. warp/tests/test_intersect.py +67 -67
  275. warp/tests/test_jax.py +307 -307
  276. warp/tests/test_large.py +167 -164
  277. warp/tests/test_launch.py +354 -354
  278. warp/tests/test_lerp.py +261 -261
  279. warp/tests/test_linear_solvers.py +191 -171
  280. warp/tests/test_lvalue.py +421 -493
  281. warp/tests/test_marching_cubes.py +65 -65
  282. warp/tests/test_mat.py +1801 -1827
  283. warp/tests/test_mat_lite.py +115 -115
  284. warp/tests/test_mat_scalar_ops.py +2907 -2889
  285. warp/tests/test_math.py +126 -193
  286. warp/tests/test_matmul.py +500 -499
  287. warp/tests/test_matmul_lite.py +410 -410
  288. warp/tests/test_mempool.py +188 -190
  289. warp/tests/test_mesh.py +284 -324
  290. warp/tests/test_mesh_query_aabb.py +228 -241
  291. warp/tests/test_mesh_query_point.py +692 -702
  292. warp/tests/test_mesh_query_ray.py +292 -303
  293. warp/tests/test_mlp.py +276 -276
  294. warp/tests/test_model.py +110 -110
  295. warp/tests/test_modules_lite.py +39 -39
  296. warp/tests/test_multigpu.py +163 -163
  297. warp/tests/test_noise.py +248 -248
  298. warp/tests/test_operators.py +250 -250
  299. warp/tests/test_options.py +123 -125
  300. warp/tests/test_peer.py +133 -137
  301. warp/tests/test_pinned.py +78 -78
  302. warp/tests/test_print.py +54 -54
  303. warp/tests/test_quat.py +2086 -2086
  304. warp/tests/test_rand.py +288 -288
  305. warp/tests/test_reload.py +217 -217
  306. warp/tests/test_rounding.py +179 -179
  307. warp/tests/test_runlength_encode.py +190 -190
  308. warp/tests/test_sim_grad.py +243 -0
  309. warp/tests/test_sim_kinematics.py +91 -97
  310. warp/tests/test_smoothstep.py +168 -168
  311. warp/tests/test_snippet.py +305 -266
  312. warp/tests/test_sparse.py +468 -460
  313. warp/tests/test_spatial.py +2148 -2148
  314. warp/tests/test_streams.py +486 -473
  315. warp/tests/test_struct.py +710 -675
  316. warp/tests/test_tape.py +173 -148
  317. warp/tests/test_torch.py +743 -743
  318. warp/tests/test_transient_module.py +87 -87
  319. warp/tests/test_types.py +556 -659
  320. warp/tests/test_utils.py +490 -499
  321. warp/tests/test_vec.py +1264 -1268
  322. warp/tests/test_vec_lite.py +73 -73
  323. warp/tests/test_vec_scalar_ops.py +2099 -2099
  324. warp/tests/test_verify_fp.py +94 -94
  325. warp/tests/test_volume.py +737 -736
  326. warp/tests/test_volume_write.py +255 -265
  327. warp/tests/unittest_serial.py +37 -37
  328. warp/tests/unittest_suites.py +363 -359
  329. warp/tests/unittest_utils.py +603 -578
  330. warp/tests/unused_test_misc.py +71 -71
  331. warp/tests/walkthrough_debug.py +85 -85
  332. warp/thirdparty/appdirs.py +598 -598
  333. warp/thirdparty/dlpack.py +143 -143
  334. warp/thirdparty/unittest_parallel.py +566 -561
  335. warp/torch.py +321 -295
  336. warp/types.py +4504 -4450
  337. warp/utils.py +1008 -821
  338. {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/LICENSE.md +126 -126
  339. {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/METADATA +338 -400
  340. warp_lang-1.1.0.dist-info/RECORD +352 -0
  341. warp/examples/assets/cube.usda +0 -42
  342. warp/examples/assets/sphere.usda +0 -56
  343. warp/examples/assets/torus.usda +0 -105
  344. warp_lang-1.0.1.dist-info/RECORD +0 -352
  345. {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/WHEEL +0 -0
  346. {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,1124 +1,1124 @@
1
- # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
- # NVIDIA CORPORATION and its licensors retain all intellectual property
3
- # and proprietary rights in and to this software, related documentation
4
- # and any modifications thereto. Any use, reproduction, disclosure or
5
- # distribution of this software and related documentation without an express
6
- # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
-
8
- import math
9
- import unittest
10
-
11
- import numpy as np
12
-
13
- import warp as wp
14
- from warp.tests.unittest_utils import *
15
-
16
- wp.init()
17
-
18
- np_signed_int_types = [
19
- np.int8,
20
- np.int16,
21
- np.int32,
22
- np.int64,
23
- np.byte,
24
- ]
25
-
26
- np_unsigned_int_types = [
27
- np.uint8,
28
- np.uint16,
29
- np.uint32,
30
- np.uint64,
31
- np.ubyte,
32
- ]
33
-
34
- np_int_types = np_signed_int_types + np_unsigned_int_types
35
-
36
- np_float_types = [np.float16, np.float32, np.float64]
37
-
38
- np_scalar_types = np_int_types + np_float_types
39
-
40
-
41
- def randvals(rng, shape, dtype):
42
- if dtype in np_float_types:
43
- return rng.standard_normal(size=shape).astype(dtype)
44
- elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
45
- return rng.integers(1, high=3, size=shape, dtype=dtype)
46
- return rng.integers(1, high=5, size=shape, dtype=dtype)
47
-
48
-
49
- kernel_cache = dict()
50
-
51
-
52
- def getkernel(func, suffix=""):
53
- key = func.__name__ + "_" + suffix
54
- if key not in kernel_cache:
55
- kernel_cache[key] = wp.Kernel(func=func, key=key)
56
- return kernel_cache[key]
57
-
58
-
59
- def get_select_kernel(dtype):
60
- def output_select_kernel_fn(
61
- input: wp.array(dtype=dtype),
62
- index: int,
63
- out: wp.array(dtype=dtype),
64
- ):
65
- out[0] = input[index]
66
-
67
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
68
-
69
-
70
- def get_select_kernel2(dtype):
71
- def output_select_kernel2_fn(
72
- input: wp.array(dtype=dtype, ndim=2),
73
- index0: int,
74
- index1: int,
75
- out: wp.array(dtype=dtype),
76
- ):
77
- out[0] = input[index0, index1]
78
-
79
- return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
80
-
81
-
82
- def test_arrays(test, device, dtype):
83
- rng = np.random.default_rng(123)
84
-
85
- tol = {
86
- np.float16: 1.0e-3,
87
- np.float32: 1.0e-6,
88
- np.float64: 1.0e-8,
89
- }.get(dtype, 0)
90
-
91
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
92
- arr_np = randvals(rng, (10, 5), dtype)
93
- arr = wp.array(arr_np, dtype=wptype, requires_grad=True, device=device)
94
-
95
- assert_np_equal(arr.numpy(), arr_np, tol=tol)
96
-
97
-
98
- def test_unary_ops(test, device, dtype, register_kernels=False):
99
- rng = np.random.default_rng(123)
100
-
101
- tol = {
102
- np.float16: 5.0e-3,
103
- np.float32: 1.0e-6,
104
- np.float64: 1.0e-8,
105
- }.get(dtype, 0)
106
-
107
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
108
-
109
- def check_unary(
110
- inputs: wp.array(dtype=wptype, ndim=2),
111
- outputs: wp.array(dtype=wptype, ndim=2),
112
- ):
113
- for i in range(10):
114
- i0 = inputs[0, i]
115
- i1 = inputs[1, i]
116
- i2 = inputs[2, i]
117
- i3 = inputs[3, i]
118
- i4 = inputs[4, i]
119
-
120
- # multiply outputs by 2 so we've got something to backpropagate:
121
- outputs[0, i] = wptype(2.0) * (+i0)
122
- outputs[1, i] = wptype(2.0) * (-i1)
123
- outputs[2, i] = wptype(2.0) * wp.sign(i2)
124
- outputs[3, i] = wptype(2.0) * wp.abs(i3)
125
- outputs[4, i] = wptype(2.0) * wp.step(i4)
126
-
127
- kernel = getkernel(check_unary, suffix=dtype.__name__)
128
- output_select_kernel = get_select_kernel2(wptype)
129
-
130
- if register_kernels:
131
- return
132
-
133
- if dtype in np_float_types:
134
- inputs = wp.array(
135
- rng.standard_normal(size=(5, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device
136
- )
137
- else:
138
- inputs = wp.array(
139
- rng.integers(-2, high=3, size=(5, 10), dtype=dtype), dtype=wptype, requires_grad=True, device=device
140
- )
141
- outputs = wp.zeros_like(inputs)
142
-
143
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
144
- assert_np_equal(outputs.numpy()[0], 2 * inputs.numpy()[0], tol=tol)
145
- assert_np_equal(outputs.numpy()[1], -2 * inputs.numpy()[1], tol=tol)
146
- expected = 2 * np.sign(inputs.numpy()[2])
147
- expected[expected == 0] = 2
148
- assert_np_equal(outputs.numpy()[2], expected, tol=tol)
149
- assert_np_equal(outputs.numpy()[3], 2 * np.abs(inputs.numpy()[3]), tol=tol)
150
- assert_np_equal(outputs.numpy()[4], 2 * (1 - np.heaviside(inputs.numpy()[4], 1)), tol=tol)
151
-
152
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
153
- if dtype in np_float_types:
154
- for i in range(10):
155
- # grad of 2x:
156
- tape = wp.Tape()
157
- with tape:
158
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
159
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
160
-
161
- tape.backward(loss=out)
162
- expected_grads = np.zeros_like(inputs.numpy())
163
- expected_grads[0, i] = 2
164
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
165
- tape.zero()
166
-
167
- # grad of -2x:
168
- tape = wp.Tape()
169
- with tape:
170
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
171
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
172
-
173
- tape.backward(loss=out)
174
- expected_grads = np.zeros_like(inputs.numpy())
175
- expected_grads[1, i] = -2
176
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
177
- tape.zero()
178
-
179
- # grad of 2 * sign(x):
180
- tape = wp.Tape()
181
- with tape:
182
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
183
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
184
-
185
- tape.backward(loss=out)
186
- expected_grads = np.zeros_like(inputs.numpy())
187
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
188
- tape.zero()
189
-
190
- # grad of 2 * abs(x):
191
- tape = wp.Tape()
192
- with tape:
193
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
194
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
195
-
196
- tape.backward(loss=out)
197
- expected_grads = np.zeros_like(inputs.numpy())
198
- expected_grads[3, i] = 2 * np.sign(inputs.numpy()[3, i])
199
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
200
- tape.zero()
201
-
202
- # grad of 2 * step(x):
203
- tape = wp.Tape()
204
- with tape:
205
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
206
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
207
-
208
- tape.backward(loss=out)
209
- expected_grads = np.zeros_like(inputs.numpy())
210
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
211
- tape.zero()
212
-
213
-
214
- def test_nonzero(test, device, dtype, register_kernels=False):
215
- rng = np.random.default_rng(123)
216
-
217
- tol = {
218
- np.float16: 5.0e-3,
219
- np.float32: 1.0e-6,
220
- np.float64: 1.0e-8,
221
- }.get(dtype, 0)
222
-
223
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
224
-
225
- def check_nonzero(
226
- inputs: wp.array(dtype=wptype),
227
- outputs: wp.array(dtype=wptype),
228
- ):
229
- for i in range(10):
230
- i0 = inputs[i]
231
- outputs[i] = wp.nonzero(i0)
232
-
233
- kernel = getkernel(check_nonzero, suffix=dtype.__name__)
234
- output_select_kernel = get_select_kernel(wptype)
235
-
236
- if register_kernels:
237
- return
238
-
239
- inputs = wp.array(rng.integers(-2, high=3, size=10).astype(dtype), dtype=wptype, requires_grad=True, device=device)
240
- outputs = wp.zeros_like(inputs)
241
-
242
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
243
- assert_np_equal(outputs.numpy(), (inputs.numpy() != 0))
244
-
245
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
246
- if dtype in np_float_types:
247
- for i in range(10):
248
- # grad should just be zero:
249
- tape = wp.Tape()
250
- with tape:
251
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
252
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
253
-
254
- tape.backward(loss=out)
255
- expected_grads = np.zeros_like(inputs.numpy())
256
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
257
- tape.zero()
258
-
259
-
260
- def test_binary_ops(test, device, dtype, register_kernels=False):
261
- rng = np.random.default_rng(123)
262
-
263
- tol = {
264
- np.float16: 5.0e-2,
265
- np.float32: 1.0e-6,
266
- np.float64: 1.0e-8,
267
- }.get(dtype, 0)
268
-
269
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
270
-
271
- def check_binary_ops(
272
- in1: wp.array(dtype=wptype, ndim=2),
273
- in2: wp.array(dtype=wptype, ndim=2),
274
- outputs: wp.array(dtype=wptype, ndim=2),
275
- ):
276
- for i in range(10):
277
- i0 = in1[0, i]
278
- i1 = in1[1, i]
279
- i2 = in1[2, i]
280
- i3 = in1[3, i]
281
- i4 = in1[4, i]
282
- i5 = in1[5, i]
283
- i6 = in1[6, i]
284
- i7 = in1[7, i]
285
-
286
- j0 = in2[0, i]
287
- j1 = in2[1, i]
288
- j2 = in2[2, i]
289
- j3 = in2[3, i]
290
- j4 = in2[4, i]
291
- j5 = in2[5, i]
292
- j6 = in2[6, i]
293
- j7 = in2[7, i]
294
-
295
- outputs[0, i] = wptype(2) * wp.mul(i0, j0)
296
- outputs[1, i] = wptype(2) * wp.div(i1, j1)
297
- outputs[2, i] = wptype(2) * wp.add(i2, j2)
298
- outputs[3, i] = wptype(2) * wp.sub(i3, j3)
299
- outputs[4, i] = wptype(2) * wp.mod(i4, j4)
300
- outputs[5, i] = wptype(2) * wp.min(i5, j5)
301
- outputs[6, i] = wptype(2) * wp.max(i6, j6)
302
- outputs[7, i] = wptype(2) * wp.floordiv(i7, j7)
303
-
304
- kernel = getkernel(check_binary_ops, suffix=dtype.__name__)
305
- output_select_kernel = get_select_kernel2(wptype)
306
-
307
- if register_kernels:
308
- return
309
-
310
- vals1 = randvals(rng, [8, 10], dtype)
311
- if dtype in [np_unsigned_int_types]:
312
- vals2 = vals1 + randvals(rng, [8, 10], dtype)
313
- else:
314
- vals2 = np.abs(randvals(rng, [8, 10], dtype))
315
-
316
- in1 = wp.array(vals1, dtype=wptype, requires_grad=True, device=device)
317
- in2 = wp.array(vals2, dtype=wptype, requires_grad=True, device=device)
318
-
319
- outputs = wp.zeros_like(in1)
320
-
321
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
322
-
323
- assert_np_equal(outputs.numpy()[0], 2 * in1.numpy()[0] * in2.numpy()[0], tol=tol)
324
- if dtype in np_float_types:
325
- assert_np_equal(outputs.numpy()[1], 2 * in1.numpy()[1] / (in2.numpy()[1]), tol=tol)
326
- else:
327
- assert_np_equal(outputs.numpy()[1], 2 * (in1.numpy()[1] // (in2.numpy()[1])), tol=tol)
328
- assert_np_equal(outputs.numpy()[2], 2 * (in1.numpy()[2] + (in2.numpy()[2])), tol=tol)
329
- assert_np_equal(outputs.numpy()[3], 2 * (in1.numpy()[3] - (in2.numpy()[3])), tol=tol)
330
-
331
- # ...so this is actually the desired behaviour right? Looks like wp.mod doesn't behave like
332
- # python's % operator or np.mod()...
333
- assert_np_equal(
334
- outputs.numpy()[4],
335
- 2
336
- * (
337
- (in1.numpy()[4])
338
- - (in2.numpy()[4]) * np.sign(in1.numpy()[4]) * np.floor(np.abs(in1.numpy()[4]) / (in2.numpy()[4]))
339
- ),
340
- tol=tol,
341
- )
342
-
343
- assert_np_equal(outputs.numpy()[5], 2 * np.minimum(in1.numpy()[5], in2.numpy()[5]), tol=tol)
344
- assert_np_equal(outputs.numpy()[6], 2 * np.maximum(in1.numpy()[6], in2.numpy()[6]), tol=tol)
345
- assert_np_equal(outputs.numpy()[7], 2 * np.floor_divide(in1.numpy()[7], in2.numpy()[7]), tol=tol)
346
-
347
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
348
- if dtype in np_float_types:
349
- for i in range(10):
350
- # multiplication:
351
- tape = wp.Tape()
352
- with tape:
353
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
354
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
355
-
356
- tape.backward(loss=out)
357
- expected = np.zeros_like(in1.numpy())
358
- expected[0, i] = 2.0 * in2.numpy()[0, i]
359
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
360
- expected[0, i] = 2.0 * in1.numpy()[0, i]
361
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
362
- tape.zero()
363
-
364
- # division:
365
- tape = wp.Tape()
366
- with tape:
367
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
368
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
369
-
370
- tape.backward(loss=out)
371
- expected = np.zeros_like(in1.numpy())
372
- expected[1, i] = 2.0 / (in2.numpy()[1, i])
373
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
374
- # y = x1/x2
375
- # dy/dx2 = -x1/x2^2
376
- expected[1, i] = (-2.0) * (in1.numpy()[1, i] / (in2.numpy()[1, i] ** 2))
377
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
378
- tape.zero()
379
-
380
- # addition:
381
- tape = wp.Tape()
382
- with tape:
383
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
384
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
385
-
386
- tape.backward(loss=out)
387
- expected = np.zeros_like(in1.numpy())
388
- expected[2, i] = 2.0
389
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
390
- expected[2, i] = 2.0
391
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
392
- tape.zero()
393
-
394
- # subtraction:
395
- tape = wp.Tape()
396
- with tape:
397
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
398
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
399
-
400
- tape.backward(loss=out)
401
- expected = np.zeros_like(in1.numpy())
402
- expected[3, i] = 2.0
403
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
404
- expected[3, i] = -2.0
405
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
406
- tape.zero()
407
-
408
- # modulus. unless at discontinuities,
409
- # d/dx1( x1 % x2 ) == 1
410
- # d/dx2( x1 % x2 ) == 0
411
- tape = wp.Tape()
412
- with tape:
413
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
414
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
415
-
416
- tape.backward(loss=out)
417
- expected = np.zeros_like(in1.numpy())
418
- expected[4, i] = 2.0
419
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
420
- expected[4, i] = 0.0
421
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
422
- tape.zero()
423
-
424
- # min
425
- tape = wp.Tape()
426
- with tape:
427
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
428
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
429
-
430
- tape.backward(loss=out)
431
- expected = np.zeros_like(in1.numpy())
432
- expected[5, i] = 2.0 if (in1.numpy()[5, i] < in2.numpy()[5, i]) else 0.0
433
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
434
- expected[5, i] = 2.0 if (in2.numpy()[5, i] < in1.numpy()[5, i]) else 0.0
435
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
436
- tape.zero()
437
-
438
- # max
439
- tape = wp.Tape()
440
- with tape:
441
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
442
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
443
-
444
- tape.backward(loss=out)
445
- expected = np.zeros_like(in1.numpy())
446
- expected[6, i] = 2.0 if (in1.numpy()[6, i] > in2.numpy()[6, i]) else 0.0
447
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
448
- expected[6, i] = 2.0 if (in2.numpy()[6, i] > in1.numpy()[6, i]) else 0.0
449
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
450
- tape.zero()
451
-
452
- # floor_divide. Returns integers so gradient is zero
453
- tape = wp.Tape()
454
- with tape:
455
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
456
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
457
-
458
- tape.backward(loss=out)
459
- expected = np.zeros_like(in1.numpy())
460
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
461
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
462
- tape.zero()
463
-
464
-
465
- def test_special_funcs(test, device, dtype, register_kernels=False):
466
- rng = np.random.default_rng(123)
467
-
468
- tol = {
469
- np.float16: 1.0e-2,
470
- np.float32: 1.0e-6,
471
- np.float64: 1.0e-8,
472
- }.get(dtype, 0)
473
-
474
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
475
-
476
- def check_special_funcs(
477
- inputs: wp.array(dtype=wptype, ndim=2),
478
- outputs: wp.array(dtype=wptype, ndim=2),
479
- ):
480
- # multiply outputs by 2 so we've got something to backpropagate:
481
- for i in range(10):
482
- outputs[0, i] = wptype(2) * wp.log(inputs[0, i])
483
- outputs[1, i] = wptype(2) * wp.log2(inputs[1, i])
484
- outputs[2, i] = wptype(2) * wp.log10(inputs[2, i])
485
- outputs[3, i] = wptype(2) * wp.exp(inputs[3, i])
486
- outputs[4, i] = wptype(2) * wp.atan(inputs[4, i])
487
- outputs[5, i] = wptype(2) * wp.sin(inputs[5, i])
488
- outputs[6, i] = wptype(2) * wp.cos(inputs[6, i])
489
- outputs[7, i] = wptype(2) * wp.sqrt(inputs[7, i])
490
- outputs[8, i] = wptype(2) * wp.tan(inputs[8, i])
491
- outputs[9, i] = wptype(2) * wp.sinh(inputs[9, i])
492
- outputs[10, i] = wptype(2) * wp.cosh(inputs[10, i])
493
- outputs[11, i] = wptype(2) * wp.tanh(inputs[11, i])
494
- outputs[12, i] = wptype(2) * wp.acos(inputs[12, i])
495
- outputs[13, i] = wptype(2) * wp.asin(inputs[13, i])
496
- outputs[14, i] = wptype(2) * wp.cbrt(inputs[14, i])
497
-
498
- kernel = getkernel(check_special_funcs, suffix=dtype.__name__)
499
- output_select_kernel = get_select_kernel2(wptype)
500
-
501
- if register_kernels:
502
- return
503
-
504
- invals = rng.normal(size=(15, 10)).astype(dtype)
505
- invals[[0, 1, 2, 7, 14]] = 0.1 + np.abs(invals[[0, 1, 2, 7, 14]])
506
- invals[12] = np.clip(invals[12], -0.9, 0.9)
507
- invals[13] = np.clip(invals[13], -0.9, 0.9)
508
- inputs = wp.array(invals, dtype=wptype, requires_grad=True, device=device)
509
- outputs = wp.zeros_like(inputs)
510
-
511
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
512
-
513
- assert_np_equal(outputs.numpy()[0], 2 * np.log(inputs.numpy()[0]), tol=tol)
514
- assert_np_equal(outputs.numpy()[1], 2 * np.log2(inputs.numpy()[1]), tol=tol)
515
- assert_np_equal(outputs.numpy()[2], 2 * np.log10(inputs.numpy()[2]), tol=tol)
516
- assert_np_equal(outputs.numpy()[3], 2 * np.exp(inputs.numpy()[3]), tol=tol)
517
- assert_np_equal(outputs.numpy()[4], 2 * np.arctan(inputs.numpy()[4]), tol=tol)
518
- assert_np_equal(outputs.numpy()[5], 2 * np.sin(inputs.numpy()[5]), tol=tol)
519
- assert_np_equal(outputs.numpy()[6], 2 * np.cos(inputs.numpy()[6]), tol=tol)
520
- assert_np_equal(outputs.numpy()[7], 2 * np.sqrt(inputs.numpy()[7]), tol=tol)
521
- assert_np_equal(outputs.numpy()[8], 2 * np.tan(inputs.numpy()[8]), tol=tol)
522
- assert_np_equal(outputs.numpy()[9], 2 * np.sinh(inputs.numpy()[9]), tol=tol)
523
- assert_np_equal(outputs.numpy()[10], 2 * np.cosh(inputs.numpy()[10]), tol=tol)
524
- assert_np_equal(outputs.numpy()[11], 2 * np.tanh(inputs.numpy()[11]), tol=tol)
525
- assert_np_equal(outputs.numpy()[12], 2 * np.arccos(inputs.numpy()[12]), tol=tol)
526
- assert_np_equal(outputs.numpy()[13], 2 * np.arcsin(inputs.numpy()[13]), tol=tol)
527
- assert_np_equal(outputs.numpy()[14], 2 * np.cbrt(inputs.numpy()[14]), tol=tol)
528
-
529
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
530
- if dtype in np_float_types:
531
- for i in range(10):
532
- # log:
533
- tape = wp.Tape()
534
- with tape:
535
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
536
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
537
-
538
- tape.backward(loss=out)
539
- expected = np.zeros_like(inputs.numpy())
540
- expected[0, i] = 2.0 / inputs.numpy()[0, i]
541
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
542
- tape.zero()
543
-
544
- # log2:
545
- tape = wp.Tape()
546
- with tape:
547
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
548
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
549
-
550
- tape.backward(loss=out)
551
- expected = np.zeros_like(inputs.numpy())
552
- expected[1, i] = 2.0 / (inputs.numpy()[1, i] * np.log(2.0))
553
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
554
- tape.zero()
555
-
556
- # log10:
557
- tape = wp.Tape()
558
- with tape:
559
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
560
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
561
-
562
- tape.backward(loss=out)
563
- expected = np.zeros_like(inputs.numpy())
564
- expected[2, i] = 2.0 / (inputs.numpy()[2, i] * np.log(10.0))
565
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
566
- tape.zero()
567
-
568
- # exp:
569
- tape = wp.Tape()
570
- with tape:
571
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
572
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
573
-
574
- tape.backward(loss=out)
575
- expected = np.zeros_like(inputs.numpy())
576
- expected[3, i] = outputs.numpy()[3, i]
577
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
578
- tape.zero()
579
-
580
- # arctan:
581
- # looks like the autodiff formula in warp was wrong? Was (1 + x^2) rather than
582
- # 1/(1 + x^2)
583
- tape = wp.Tape()
584
- with tape:
585
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
586
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
587
-
588
- tape.backward(loss=out)
589
- expected = np.zeros_like(inputs.numpy())
590
- expected[4, i] = 2.0 / (inputs.numpy()[4, i] ** 2 + 1)
591
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
592
- tape.zero()
593
-
594
- # sin:
595
- tape = wp.Tape()
596
- with tape:
597
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
598
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
599
-
600
- tape.backward(loss=out)
601
- expected = np.zeros_like(inputs.numpy())
602
- expected[5, i] = np.cos(inputs.numpy()[5, i]) * 2
603
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
604
- tape.zero()
605
-
606
- # cos:
607
- tape = wp.Tape()
608
- with tape:
609
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
610
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
611
-
612
- tape.backward(loss=out)
613
- expected = np.zeros_like(inputs.numpy())
614
- expected[6, i] = -np.sin(inputs.numpy()[6, i]) * 2.0
615
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
616
- tape.zero()
617
-
618
- # sqrt:
619
- tape = wp.Tape()
620
- with tape:
621
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
622
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
623
-
624
- tape.backward(loss=out)
625
- expected = np.zeros_like(inputs.numpy())
626
- expected[7, i] = 1.0 / (np.sqrt(inputs.numpy()[7, i]))
627
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
628
- tape.zero()
629
-
630
- # tan:
631
- # looks like there was a bug in autodiff formula here too - gradient was zero if cos(x) > 0
632
- # (should have been "if(cosx != 0)")
633
- tape = wp.Tape()
634
- with tape:
635
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
636
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 8, i], outputs=[out], device=device)
637
-
638
- tape.backward(loss=out)
639
- expected = np.zeros_like(inputs.numpy())
640
- expected[8, i] = 2.0 / (np.cos(inputs.numpy()[8, i]) ** 2)
641
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=200 * tol)
642
- tape.zero()
643
-
644
- # sinh:
645
- tape = wp.Tape()
646
- with tape:
647
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
648
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 9, i], outputs=[out], device=device)
649
-
650
- tape.backward(loss=out)
651
- expected = np.zeros_like(inputs.numpy())
652
- expected[9, i] = 2.0 * np.cosh(inputs.numpy()[9, i])
653
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
654
- tape.zero()
655
-
656
- # cosh:
657
- tape = wp.Tape()
658
- with tape:
659
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
660
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 10, i], outputs=[out], device=device)
661
-
662
- tape.backward(loss=out)
663
- expected = np.zeros_like(inputs.numpy())
664
- expected[10, i] = 2.0 * np.sinh(inputs.numpy()[10, i])
665
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
666
- tape.zero()
667
-
668
- # tanh:
669
- tape = wp.Tape()
670
- with tape:
671
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
672
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 11, i], outputs=[out], device=device)
673
-
674
- tape.backward(loss=out)
675
- expected = np.zeros_like(inputs.numpy())
676
- expected[11, i] = 2.0 / (np.cosh(inputs.numpy()[11, i]) ** 2)
677
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
678
- tape.zero()
679
-
680
- # arccos:
681
- tape = wp.Tape()
682
- with tape:
683
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
684
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 12, i], outputs=[out], device=device)
685
-
686
- tape.backward(loss=out)
687
- expected = np.zeros_like(inputs.numpy())
688
- expected[12, i] = -2.0 / np.sqrt(1 - inputs.numpy()[12, i] ** 2)
689
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
690
- tape.zero()
691
-
692
- # arcsin:
693
- tape = wp.Tape()
694
- with tape:
695
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
696
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 13, i], outputs=[out], device=device)
697
-
698
- tape.backward(loss=out)
699
- expected = np.zeros_like(inputs.numpy())
700
- expected[13, i] = 2.0 / np.sqrt(1 - inputs.numpy()[13, i] ** 2)
701
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=6 * tol)
702
- tape.zero()
703
-
704
- # cbrt:
705
- tape = wp.Tape()
706
- with tape:
707
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
708
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 14, i], outputs=[out], device=device)
709
-
710
- tape.backward(loss=out)
711
- expected = np.zeros_like(inputs.numpy())
712
- cbrt = np.cbrt(inputs.numpy()[14, i], dtype=np.dtype(dtype))
713
- expected[14, i] = (2.0 / 3.0) * (1.0 / (cbrt * cbrt))
714
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
715
- tape.zero()
716
-
717
-
718
- def test_special_funcs_2arg(test, device, dtype, register_kernels=False):
719
- rng = np.random.default_rng(123)
720
-
721
- tol = {
722
- np.float16: 1.0e-2,
723
- np.float32: 1.0e-6,
724
- np.float64: 1.0e-8,
725
- }.get(dtype, 0)
726
-
727
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
728
-
729
- def check_special_funcs_2arg(
730
- in1: wp.array(dtype=wptype, ndim=2),
731
- in2: wp.array(dtype=wptype, ndim=2),
732
- outputs: wp.array(dtype=wptype, ndim=2),
733
- ):
734
- # multiply outputs by 2 so we've got something to backpropagate:
735
- for i in range(10):
736
- outputs[0, i] = wptype(2) * wp.pow(in1[0, i], in2[0, i])
737
- outputs[1, i] = wptype(2) * wp.atan2(in1[1, i], in2[1, i])
738
-
739
- kernel = getkernel(check_special_funcs_2arg, suffix=dtype.__name__)
740
- output_select_kernel = get_select_kernel2(wptype)
741
-
742
- if register_kernels:
743
- return
744
-
745
- in1 = wp.array(np.abs(randvals(rng, [2, 10], dtype)), dtype=wptype, requires_grad=True, device=device)
746
- in2 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
747
- outputs = wp.zeros_like(in1)
748
-
749
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
750
-
751
- assert_np_equal(outputs.numpy()[0], 2.0 * np.power(in1.numpy()[0], in2.numpy()[0]), tol=tol)
752
- assert_np_equal(outputs.numpy()[1], 2.0 * np.arctan2(in1.numpy()[1], in2.numpy()[1]), tol=tol)
753
-
754
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
755
- if dtype in np_float_types:
756
- for i in range(10):
757
- # pow:
758
- tape = wp.Tape()
759
- with tape:
760
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
761
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
762
- tape.backward(loss=out)
763
- expected = np.zeros_like(in1.numpy())
764
- expected[0, i] = 2.0 * in2.numpy()[0, i] * np.power(in1.numpy()[0, i], in2.numpy()[0, i] - 1)
765
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=5 * tol)
766
- expected[0, i] = 2.0 * np.power(in1.numpy()[0, i], in2.numpy()[0, i]) * np.log(in1.numpy()[0, i])
767
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
768
- tape.zero()
769
-
770
- # atan2:
771
- tape = wp.Tape()
772
- with tape:
773
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
774
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
775
-
776
- tape.backward(loss=out)
777
- expected = np.zeros_like(in1.numpy())
778
- expected[1, i] = 2.0 * in2.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
779
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
780
- expected[1, i] = -2.0 * in1.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
781
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
782
- tape.zero()
783
-
784
-
785
- def test_float_to_int(test, device, dtype, register_kernels=False):
786
- rng = np.random.default_rng(123)
787
-
788
- tol = {
789
- np.float16: 5.0e-3,
790
- np.float32: 1.0e-6,
791
- np.float64: 1.0e-8,
792
- }.get(dtype, 0)
793
-
794
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
795
-
796
- def check_float_to_int(
797
- inputs: wp.array(dtype=wptype, ndim=2),
798
- outputs: wp.array(dtype=wptype, ndim=2),
799
- ):
800
- for i in range(10):
801
- outputs[0, i] = wp.round(inputs[0, i])
802
- outputs[1, i] = wp.rint(inputs[1, i])
803
- outputs[2, i] = wp.trunc(inputs[2, i])
804
- outputs[3, i] = wp.floor(inputs[3, i])
805
- outputs[4, i] = wp.ceil(inputs[4, i])
806
- outputs[5, i] = wp.frac(inputs[5, i])
807
-
808
- kernel = getkernel(check_float_to_int, suffix=dtype.__name__)
809
- output_select_kernel = get_select_kernel2(wptype)
810
-
811
- if register_kernels:
812
- return
813
-
814
- inputs = wp.array(rng.standard_normal(size=(6, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device)
815
- outputs = wp.zeros_like(inputs)
816
-
817
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
818
-
819
- assert_np_equal(outputs.numpy()[0], np.round(inputs.numpy()[0]))
820
- assert_np_equal(outputs.numpy()[1], np.rint(inputs.numpy()[1]))
821
- assert_np_equal(outputs.numpy()[2], np.trunc(inputs.numpy()[2]))
822
- assert_np_equal(outputs.numpy()[3], np.floor(inputs.numpy()[3]))
823
- assert_np_equal(outputs.numpy()[4], np.ceil(inputs.numpy()[4]))
824
- assert_np_equal(outputs.numpy()[5], np.modf(inputs.numpy()[5])[0])
825
-
826
- # all the gradients should be zero as these functions are piecewise constant:
827
-
828
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
829
- for i in range(10):
830
- for j in range(5):
831
- tape = wp.Tape()
832
- with tape:
833
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
834
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, j, i], outputs=[out], device=device)
835
-
836
- tape.backward(loss=out)
837
- assert_np_equal(tape.gradients[inputs].numpy(), np.zeros_like(inputs.numpy()), tol=tol)
838
- tape.zero()
839
-
840
-
841
- def test_infinity(test, device, dtype, register_kernels=False):
842
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
843
-
844
- def check_infinity(
845
- outputs: wp.array(dtype=wptype),
846
- ):
847
- outputs[0] = wptype(wp.inf)
848
- outputs[1] = wptype(-wp.inf)
849
- outputs[2] = wptype(2.0 * wp.inf)
850
- outputs[3] = wptype(-2.0 * wp.inf)
851
- outputs[4] = wptype(2.0 / 0.0)
852
- outputs[5] = wptype(-2.0 / 0.0)
853
-
854
- kernel = getkernel(check_infinity, suffix=dtype.__name__)
855
-
856
- if register_kernels:
857
- return
858
-
859
- outputs = wp.zeros(6, dtype=wptype, device=device)
860
-
861
- wp.launch(kernel, dim=1, inputs=[], outputs=[outputs], device=device)
862
-
863
- test.assertEqual(outputs.numpy()[0], math.inf)
864
- test.assertEqual(outputs.numpy()[1], -math.inf)
865
- test.assertEqual(outputs.numpy()[2], math.inf)
866
- test.assertEqual(outputs.numpy()[3], -math.inf)
867
- test.assertEqual(outputs.numpy()[4], math.inf)
868
- test.assertEqual(outputs.numpy()[5], -math.inf)
869
-
870
-
871
- def test_interp(test, device, dtype, register_kernels=False):
872
- rng = np.random.default_rng(123)
873
-
874
- tol = {
875
- np.float16: 1.0e-2,
876
- np.float32: 5.0e-6,
877
- np.float64: 1.0e-8,
878
- }.get(dtype, 0)
879
-
880
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
881
-
882
- def check_interp(
883
- in1: wp.array(dtype=wptype, ndim=2),
884
- in2: wp.array(dtype=wptype, ndim=2),
885
- in3: wp.array(dtype=wptype, ndim=2),
886
- outputs: wp.array(dtype=wptype, ndim=2),
887
- ):
888
- # multiply outputs by 2 so we've got something to backpropagate:
889
- for i in range(10):
890
- outputs[0, i] = wptype(2) * wp.smoothstep(in1[0, i], in2[0, i], in3[0, i])
891
- outputs[1, i] = wptype(2) * wp.lerp(in1[1, i], in2[1, i], in3[1, i])
892
-
893
- kernel = getkernel(check_interp, suffix=dtype.__name__)
894
- output_select_kernel = get_select_kernel2(wptype)
895
-
896
- if register_kernels:
897
- return
898
-
899
- e0 = randvals(rng, [2, 10], dtype)
900
- e1 = e0 + randvals(rng, [2, 10], dtype) + 0.1
901
- in1 = wp.array(e0, dtype=wptype, requires_grad=True, device=device)
902
- in2 = wp.array(e1, dtype=wptype, requires_grad=True, device=device)
903
- in3 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
904
-
905
- outputs = wp.zeros_like(in1)
906
-
907
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
908
-
909
- edge0 = in1.numpy()[0]
910
- edge1 = in2.numpy()[0]
911
- t_smoothstep = in3.numpy()[0]
912
- x = np.clip((t_smoothstep - edge0) / (edge1 - edge0), 0, 1)
913
- smoothstep_expected = 2.0 * x * x * (3 - 2 * x)
914
-
915
- assert_np_equal(outputs.numpy()[0], smoothstep_expected, tol=tol)
916
-
917
- a = in1.numpy()[1]
918
- b = in2.numpy()[1]
919
- t = in3.numpy()[1]
920
- assert_np_equal(outputs.numpy()[1], 2.0 * (a * (1 - t) + b * t), tol=tol)
921
-
922
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
923
- if dtype in np_float_types:
924
- for i in range(10):
925
- tape = wp.Tape()
926
- with tape:
927
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
928
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
929
- tape.backward(loss=out)
930
-
931
- # e0 = in1
932
- # e1 = in2
933
- # t = in3
934
-
935
- # x = clamp((t - e0) / (e1 - e0), 0,1)
936
- # dx/dt = 1 / (e1 - e0) if e0 < t < e1 else 0
937
-
938
- # y = x * x * (3 - 2 * x)
939
-
940
- # y = 3 * x * x - 2 * x * x * x
941
- # dy/dx = 6 * ( x - x^2 )
942
- dydx = 6 * x * (1 - x)
943
-
944
- # dy/in1 = dy/dx dx/de0 de0/din1
945
- dxde0 = (t_smoothstep - edge1) / ((edge1 - edge0) ** 2)
946
- dxde0[x == 0] = 0
947
- dxde0[x == 1] = 0
948
-
949
- expected_grads = np.zeros_like(in1.numpy())
950
- expected_grads[0, i] = 2.0 * dydx[i] * dxde0[i]
951
- assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
952
-
953
- # dy/in2 = dy/dx dx/de1 de1/din2
954
- dxde1 = (edge0 - t_smoothstep) / ((edge1 - edge0) ** 2)
955
- dxde1[x == 0] = 0
956
- dxde1[x == 1] = 0
957
-
958
- expected_grads = np.zeros_like(in1.numpy())
959
- expected_grads[0, i] = 2.0 * dydx[i] * dxde1[i]
960
- assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
961
-
962
- # dy/in3 = dy/dx dx/dt dt/din3
963
- dxdt = 1.0 / (edge1 - edge0)
964
- dxdt[x == 0] = 0
965
- dxdt[x == 1] = 0
966
-
967
- expected_grads = np.zeros_like(in1.numpy())
968
- expected_grads[0, i] = 2.0 * dydx[i] * dxdt[i]
969
- assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
970
- tape.zero()
971
-
972
- tape = wp.Tape()
973
- with tape:
974
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
975
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
976
- tape.backward(loss=out)
977
-
978
- # y = a*(1-t) + b*t
979
- # a = in1
980
- # b = in2
981
- # t = in3
982
-
983
- # y = in1*( 1 - in3 ) + in2*in3
984
-
985
- # dy/din1 = (1-in3)
986
- expected_grads = np.zeros_like(in1.numpy())
987
- expected_grads[1, i] = 2.0 * (1 - in3.numpy()[1, i])
988
- assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
989
-
990
- # dy/din2 = in3
991
- expected_grads = np.zeros_like(in1.numpy())
992
- expected_grads[1, i] = 2.0 * in3.numpy()[1, i]
993
- assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
994
-
995
- # dy/din3 = 8*in2 - 1.5*4*in1
996
- expected_grads = np.zeros_like(in1.numpy())
997
- expected_grads[1, i] = 2.0 * (in2.numpy()[1, i] - in1.numpy()[1, i])
998
- assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
999
- tape.zero()
1000
-
1001
-
1002
- def test_clamp(test, device, dtype, register_kernels=False):
1003
- rng = np.random.default_rng(123)
1004
-
1005
- tol = {
1006
- np.float16: 5.0e-3,
1007
- np.float32: 1.0e-6,
1008
- np.float64: 1.0e-6,
1009
- }.get(dtype, 0)
1010
-
1011
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1012
-
1013
- def check_clamp(
1014
- in1: wp.array(dtype=wptype),
1015
- in2: wp.array(dtype=wptype),
1016
- in3: wp.array(dtype=wptype),
1017
- outputs: wp.array(dtype=wptype),
1018
- ):
1019
- for i in range(100):
1020
- # multiply output by 2 so we've got something to backpropagate:
1021
- outputs[i] = wptype(2) * wp.clamp(in1[i], in2[i], in3[i])
1022
-
1023
- kernel = getkernel(check_clamp, suffix=dtype.__name__)
1024
- output_select_kernel = get_select_kernel(wptype)
1025
-
1026
- if register_kernels:
1027
- return
1028
-
1029
- in1 = wp.array(randvals(rng, [100], dtype), dtype=wptype, requires_grad=True, device=device)
1030
- starts = randvals(rng, [100], dtype)
1031
- diffs = np.abs(randvals(rng, [100], dtype))
1032
- in2 = wp.array(starts, dtype=wptype, requires_grad=True, device=device)
1033
- in3 = wp.array(starts + diffs, dtype=wptype, requires_grad=True, device=device)
1034
- outputs = wp.zeros_like(in1)
1035
-
1036
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1037
-
1038
- assert_np_equal(2 * np.clip(in1.numpy(), in2.numpy(), in3.numpy()), outputs.numpy(), tol=tol)
1039
-
1040
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1041
- if dtype in np_float_types:
1042
- for i in range(100):
1043
- tape = wp.Tape()
1044
- with tape:
1045
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1046
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
1047
-
1048
- tape.backward(loss=out)
1049
- t = in1.numpy()[i]
1050
- lower = in2.numpy()[i]
1051
- upper = in3.numpy()[i]
1052
- expected = np.zeros_like(in1.numpy())
1053
- if t < lower:
1054
- expected[i] = 2.0
1055
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1056
- expected[i] = 0.0
1057
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1058
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1059
- elif t > upper:
1060
- expected[i] = 2.0
1061
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1062
- expected[i] = 0.0
1063
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1064
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1065
- else:
1066
- expected[i] = 2.0
1067
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1068
- expected[i] = 0.0
1069
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1070
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1071
-
1072
- tape.zero()
1073
-
1074
-
1075
- devices = get_test_devices()
1076
-
1077
-
1078
- class TestArithmetic(unittest.TestCase):
1079
- pass
1080
-
1081
-
1082
- # these unary ops only make sense for signed values:
1083
- for dtype in np_signed_int_types + np_float_types:
1084
- add_function_test_register_kernel(
1085
- TestArithmetic, f"test_unary_ops_{dtype.__name__}", test_unary_ops, devices=devices, dtype=dtype
1086
- )
1087
-
1088
- for dtype in np_float_types:
1089
- add_function_test_register_kernel(
1090
- TestArithmetic, f"test_special_funcs_{dtype.__name__}", test_special_funcs, devices=devices, dtype=dtype
1091
- )
1092
- add_function_test_register_kernel(
1093
- TestArithmetic,
1094
- f"test_special_funcs_2arg_{dtype.__name__}",
1095
- test_special_funcs_2arg,
1096
- devices=devices,
1097
- dtype=dtype,
1098
- )
1099
- add_function_test_register_kernel(
1100
- TestArithmetic, f"test_interp_{dtype.__name__}", test_interp, devices=devices, dtype=dtype
1101
- )
1102
- add_function_test_register_kernel(
1103
- TestArithmetic, f"test_float_to_int_{dtype.__name__}", test_float_to_int, devices=devices, dtype=dtype
1104
- )
1105
- add_function_test_register_kernel(
1106
- TestArithmetic, f"test_infinity_{dtype.__name__}", test_infinity, devices=devices, dtype=dtype
1107
- )
1108
-
1109
- for dtype in np_scalar_types:
1110
- add_function_test_register_kernel(
1111
- TestArithmetic, f"test_clamp_{dtype.__name__}", test_clamp, devices=devices, dtype=dtype
1112
- )
1113
- add_function_test_register_kernel(
1114
- TestArithmetic, f"test_nonzero_{dtype.__name__}", test_nonzero, devices=devices, dtype=dtype
1115
- )
1116
- add_function_test(TestArithmetic, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
1117
- add_function_test_register_kernel(
1118
- TestArithmetic, f"test_binary_ops_{dtype.__name__}", test_binary_ops, devices=devices, dtype=dtype
1119
- )
1120
-
1121
-
1122
- if __name__ == "__main__":
1123
- wp.build.clear_kernel_cache()
1124
- unittest.main(verbosity=2, failfast=False)
1
+ # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import math
9
+ import unittest
10
+
11
+ import numpy as np
12
+
13
+ import warp as wp
14
+ from warp.tests.unittest_utils import *
15
+
16
+ wp.init()
17
+
18
+ np_signed_int_types = [
19
+ np.int8,
20
+ np.int16,
21
+ np.int32,
22
+ np.int64,
23
+ np.byte,
24
+ ]
25
+
26
+ np_unsigned_int_types = [
27
+ np.uint8,
28
+ np.uint16,
29
+ np.uint32,
30
+ np.uint64,
31
+ np.ubyte,
32
+ ]
33
+
34
+ np_int_types = np_signed_int_types + np_unsigned_int_types
35
+
36
+ np_float_types = [np.float16, np.float32, np.float64]
37
+
38
+ np_scalar_types = np_int_types + np_float_types
39
+
40
+
41
+ def randvals(rng, shape, dtype):
42
+ if dtype in np_float_types:
43
+ return rng.standard_normal(size=shape).astype(dtype)
44
+ elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
45
+ return rng.integers(1, high=3, size=shape, dtype=dtype)
46
+ return rng.integers(1, high=5, size=shape, dtype=dtype)
47
+
48
+
49
+ kernel_cache = {}
50
+
51
+
52
+ def getkernel(func, suffix=""):
53
+ key = func.__name__ + "_" + suffix
54
+ if key not in kernel_cache:
55
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
56
+ return kernel_cache[key]
57
+
58
+
59
+ def get_select_kernel(dtype):
60
+ def output_select_kernel_fn(
61
+ input: wp.array(dtype=dtype),
62
+ index: int,
63
+ out: wp.array(dtype=dtype),
64
+ ):
65
+ out[0] = input[index]
66
+
67
+ return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
68
+
69
+
70
+ def get_select_kernel2(dtype):
71
+ def output_select_kernel2_fn(
72
+ input: wp.array(dtype=dtype, ndim=2),
73
+ index0: int,
74
+ index1: int,
75
+ out: wp.array(dtype=dtype),
76
+ ):
77
+ out[0] = input[index0, index1]
78
+
79
+ return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
80
+
81
+
82
+ def test_arrays(test, device, dtype):
83
+ rng = np.random.default_rng(123)
84
+
85
+ tol = {
86
+ np.float16: 1.0e-3,
87
+ np.float32: 1.0e-6,
88
+ np.float64: 1.0e-8,
89
+ }.get(dtype, 0)
90
+
91
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
92
+ arr_np = randvals(rng, (10, 5), dtype)
93
+ arr = wp.array(arr_np, dtype=wptype, requires_grad=True, device=device)
94
+
95
+ assert_np_equal(arr.numpy(), arr_np, tol=tol)
96
+
97
+
98
+ def test_unary_ops(test, device, dtype, register_kernels=False):
99
+ rng = np.random.default_rng(123)
100
+
101
+ tol = {
102
+ np.float16: 5.0e-3,
103
+ np.float32: 1.0e-6,
104
+ np.float64: 1.0e-8,
105
+ }.get(dtype, 0)
106
+
107
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
108
+
109
+ def check_unary(
110
+ inputs: wp.array(dtype=wptype, ndim=2),
111
+ outputs: wp.array(dtype=wptype, ndim=2),
112
+ ):
113
+ for i in range(10):
114
+ i0 = inputs[0, i]
115
+ i1 = inputs[1, i]
116
+ i2 = inputs[2, i]
117
+ i3 = inputs[3, i]
118
+ i4 = inputs[4, i]
119
+
120
+ # multiply outputs by 2 so we've got something to backpropagate:
121
+ outputs[0, i] = wptype(2.0) * (+i0)
122
+ outputs[1, i] = wptype(2.0) * (-i1)
123
+ outputs[2, i] = wptype(2.0) * wp.sign(i2)
124
+ outputs[3, i] = wptype(2.0) * wp.abs(i3)
125
+ outputs[4, i] = wptype(2.0) * wp.step(i4)
126
+
127
+ kernel = getkernel(check_unary, suffix=dtype.__name__)
128
+ output_select_kernel = get_select_kernel2(wptype)
129
+
130
+ if register_kernels:
131
+ return
132
+
133
+ if dtype in np_float_types:
134
+ inputs = wp.array(
135
+ rng.standard_normal(size=(5, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device
136
+ )
137
+ else:
138
+ inputs = wp.array(
139
+ rng.integers(-2, high=3, size=(5, 10), dtype=dtype), dtype=wptype, requires_grad=True, device=device
140
+ )
141
+ outputs = wp.zeros_like(inputs)
142
+
143
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
144
+ assert_np_equal(outputs.numpy()[0], 2 * inputs.numpy()[0], tol=tol)
145
+ assert_np_equal(outputs.numpy()[1], -2 * inputs.numpy()[1], tol=tol)
146
+ expected = 2 * np.sign(inputs.numpy()[2])
147
+ expected[expected == 0] = 2
148
+ assert_np_equal(outputs.numpy()[2], expected, tol=tol)
149
+ assert_np_equal(outputs.numpy()[3], 2 * np.abs(inputs.numpy()[3]), tol=tol)
150
+ assert_np_equal(outputs.numpy()[4], 2 * (1 - np.heaviside(inputs.numpy()[4], 1)), tol=tol)
151
+
152
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
153
+ if dtype in np_float_types:
154
+ for i in range(10):
155
+ # grad of 2x:
156
+ tape = wp.Tape()
157
+ with tape:
158
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
159
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
160
+
161
+ tape.backward(loss=out)
162
+ expected_grads = np.zeros_like(inputs.numpy())
163
+ expected_grads[0, i] = 2
164
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
165
+ tape.zero()
166
+
167
+ # grad of -2x:
168
+ tape = wp.Tape()
169
+ with tape:
170
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
171
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
172
+
173
+ tape.backward(loss=out)
174
+ expected_grads = np.zeros_like(inputs.numpy())
175
+ expected_grads[1, i] = -2
176
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
177
+ tape.zero()
178
+
179
+ # grad of 2 * sign(x):
180
+ tape = wp.Tape()
181
+ with tape:
182
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
183
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
184
+
185
+ tape.backward(loss=out)
186
+ expected_grads = np.zeros_like(inputs.numpy())
187
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
188
+ tape.zero()
189
+
190
+ # grad of 2 * abs(x):
191
+ tape = wp.Tape()
192
+ with tape:
193
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
194
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
195
+
196
+ tape.backward(loss=out)
197
+ expected_grads = np.zeros_like(inputs.numpy())
198
+ expected_grads[3, i] = 2 * np.sign(inputs.numpy()[3, i])
199
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
200
+ tape.zero()
201
+
202
+ # grad of 2 * step(x):
203
+ tape = wp.Tape()
204
+ with tape:
205
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
206
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
207
+
208
+ tape.backward(loss=out)
209
+ expected_grads = np.zeros_like(inputs.numpy())
210
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
211
+ tape.zero()
212
+
213
+
214
+ def test_nonzero(test, device, dtype, register_kernels=False):
215
+ rng = np.random.default_rng(123)
216
+
217
+ tol = {
218
+ np.float16: 5.0e-3,
219
+ np.float32: 1.0e-6,
220
+ np.float64: 1.0e-8,
221
+ }.get(dtype, 0)
222
+
223
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
224
+
225
+ def check_nonzero(
226
+ inputs: wp.array(dtype=wptype),
227
+ outputs: wp.array(dtype=wptype),
228
+ ):
229
+ for i in range(10):
230
+ i0 = inputs[i]
231
+ outputs[i] = wp.nonzero(i0)
232
+
233
+ kernel = getkernel(check_nonzero, suffix=dtype.__name__)
234
+ output_select_kernel = get_select_kernel(wptype)
235
+
236
+ if register_kernels:
237
+ return
238
+
239
+ inputs = wp.array(rng.integers(-2, high=3, size=10).astype(dtype), dtype=wptype, requires_grad=True, device=device)
240
+ outputs = wp.zeros_like(inputs)
241
+
242
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
243
+ assert_np_equal(outputs.numpy(), (inputs.numpy() != 0))
244
+
245
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
246
+ if dtype in np_float_types:
247
+ for i in range(10):
248
+ # grad should just be zero:
249
+ tape = wp.Tape()
250
+ with tape:
251
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
252
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
253
+
254
+ tape.backward(loss=out)
255
+ expected_grads = np.zeros_like(inputs.numpy())
256
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
257
+ tape.zero()
258
+
259
+
260
+ def test_binary_ops(test, device, dtype, register_kernels=False):
261
+ rng = np.random.default_rng(123)
262
+
263
+ tol = {
264
+ np.float16: 5.0e-2,
265
+ np.float32: 1.0e-6,
266
+ np.float64: 1.0e-8,
267
+ }.get(dtype, 0)
268
+
269
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
270
+
271
+ def check_binary_ops(
272
+ in1: wp.array(dtype=wptype, ndim=2),
273
+ in2: wp.array(dtype=wptype, ndim=2),
274
+ outputs: wp.array(dtype=wptype, ndim=2),
275
+ ):
276
+ for i in range(10):
277
+ i0 = in1[0, i]
278
+ i1 = in1[1, i]
279
+ i2 = in1[2, i]
280
+ i3 = in1[3, i]
281
+ i4 = in1[4, i]
282
+ i5 = in1[5, i]
283
+ i6 = in1[6, i]
284
+ i7 = in1[7, i]
285
+
286
+ j0 = in2[0, i]
287
+ j1 = in2[1, i]
288
+ j2 = in2[2, i]
289
+ j3 = in2[3, i]
290
+ j4 = in2[4, i]
291
+ j5 = in2[5, i]
292
+ j6 = in2[6, i]
293
+ j7 = in2[7, i]
294
+
295
+ outputs[0, i] = wptype(2) * wp.mul(i0, j0)
296
+ outputs[1, i] = wptype(2) * wp.div(i1, j1)
297
+ outputs[2, i] = wptype(2) * wp.add(i2, j2)
298
+ outputs[3, i] = wptype(2) * wp.sub(i3, j3)
299
+ outputs[4, i] = wptype(2) * wp.mod(i4, j4)
300
+ outputs[5, i] = wptype(2) * wp.min(i5, j5)
301
+ outputs[6, i] = wptype(2) * wp.max(i6, j6)
302
+ outputs[7, i] = wptype(2) * wp.floordiv(i7, j7)
303
+
304
+ kernel = getkernel(check_binary_ops, suffix=dtype.__name__)
305
+ output_select_kernel = get_select_kernel2(wptype)
306
+
307
+ if register_kernels:
308
+ return
309
+
310
+ vals1 = randvals(rng, [8, 10], dtype)
311
+ if dtype in [np_unsigned_int_types]:
312
+ vals2 = vals1 + randvals(rng, [8, 10], dtype)
313
+ else:
314
+ vals2 = np.abs(randvals(rng, [8, 10], dtype))
315
+
316
+ in1 = wp.array(vals1, dtype=wptype, requires_grad=True, device=device)
317
+ in2 = wp.array(vals2, dtype=wptype, requires_grad=True, device=device)
318
+
319
+ outputs = wp.zeros_like(in1)
320
+
321
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
322
+
323
+ assert_np_equal(outputs.numpy()[0], 2 * in1.numpy()[0] * in2.numpy()[0], tol=tol)
324
+ if dtype in np_float_types:
325
+ assert_np_equal(outputs.numpy()[1], 2 * in1.numpy()[1] / (in2.numpy()[1]), tol=tol)
326
+ else:
327
+ assert_np_equal(outputs.numpy()[1], 2 * (in1.numpy()[1] // (in2.numpy()[1])), tol=tol)
328
+ assert_np_equal(outputs.numpy()[2], 2 * (in1.numpy()[2] + (in2.numpy()[2])), tol=tol)
329
+ assert_np_equal(outputs.numpy()[3], 2 * (in1.numpy()[3] - (in2.numpy()[3])), tol=tol)
330
+
331
+ # ...so this is actually the desired behaviour right? Looks like wp.mod doesn't behave like
332
+ # python's % operator or np.mod()...
333
+ assert_np_equal(
334
+ outputs.numpy()[4],
335
+ 2
336
+ * (
337
+ (in1.numpy()[4])
338
+ - (in2.numpy()[4]) * np.sign(in1.numpy()[4]) * np.floor(np.abs(in1.numpy()[4]) / (in2.numpy()[4]))
339
+ ),
340
+ tol=tol,
341
+ )
342
+
343
+ assert_np_equal(outputs.numpy()[5], 2 * np.minimum(in1.numpy()[5], in2.numpy()[5]), tol=tol)
344
+ assert_np_equal(outputs.numpy()[6], 2 * np.maximum(in1.numpy()[6], in2.numpy()[6]), tol=tol)
345
+ assert_np_equal(outputs.numpy()[7], 2 * np.floor_divide(in1.numpy()[7], in2.numpy()[7]), tol=tol)
346
+
347
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
348
+ if dtype in np_float_types:
349
+ for i in range(10):
350
+ # multiplication:
351
+ tape = wp.Tape()
352
+ with tape:
353
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
354
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
355
+
356
+ tape.backward(loss=out)
357
+ expected = np.zeros_like(in1.numpy())
358
+ expected[0, i] = 2.0 * in2.numpy()[0, i]
359
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
360
+ expected[0, i] = 2.0 * in1.numpy()[0, i]
361
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
362
+ tape.zero()
363
+
364
+ # division:
365
+ tape = wp.Tape()
366
+ with tape:
367
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
368
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
369
+
370
+ tape.backward(loss=out)
371
+ expected = np.zeros_like(in1.numpy())
372
+ expected[1, i] = 2.0 / (in2.numpy()[1, i])
373
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
374
+ # y = x1/x2
375
+ # dy/dx2 = -x1/x2^2
376
+ expected[1, i] = (-2.0) * (in1.numpy()[1, i] / (in2.numpy()[1, i] ** 2))
377
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
378
+ tape.zero()
379
+
380
+ # addition:
381
+ tape = wp.Tape()
382
+ with tape:
383
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
384
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
385
+
386
+ tape.backward(loss=out)
387
+ expected = np.zeros_like(in1.numpy())
388
+ expected[2, i] = 2.0
389
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
390
+ expected[2, i] = 2.0
391
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
392
+ tape.zero()
393
+
394
+ # subtraction:
395
+ tape = wp.Tape()
396
+ with tape:
397
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
398
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
399
+
400
+ tape.backward(loss=out)
401
+ expected = np.zeros_like(in1.numpy())
402
+ expected[3, i] = 2.0
403
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
404
+ expected[3, i] = -2.0
405
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
406
+ tape.zero()
407
+
408
+ # modulus. unless at discontinuities,
409
+ # d/dx1( x1 % x2 ) == 1
410
+ # d/dx2( x1 % x2 ) == 0
411
+ tape = wp.Tape()
412
+ with tape:
413
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
414
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
415
+
416
+ tape.backward(loss=out)
417
+ expected = np.zeros_like(in1.numpy())
418
+ expected[4, i] = 2.0
419
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
420
+ expected[4, i] = 0.0
421
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
422
+ tape.zero()
423
+
424
+ # min
425
+ tape = wp.Tape()
426
+ with tape:
427
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
428
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
429
+
430
+ tape.backward(loss=out)
431
+ expected = np.zeros_like(in1.numpy())
432
+ expected[5, i] = 2.0 if (in1.numpy()[5, i] < in2.numpy()[5, i]) else 0.0
433
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
434
+ expected[5, i] = 2.0 if (in2.numpy()[5, i] < in1.numpy()[5, i]) else 0.0
435
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
436
+ tape.zero()
437
+
438
+ # max
439
+ tape = wp.Tape()
440
+ with tape:
441
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
442
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
443
+
444
+ tape.backward(loss=out)
445
+ expected = np.zeros_like(in1.numpy())
446
+ expected[6, i] = 2.0 if (in1.numpy()[6, i] > in2.numpy()[6, i]) else 0.0
447
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
448
+ expected[6, i] = 2.0 if (in2.numpy()[6, i] > in1.numpy()[6, i]) else 0.0
449
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
450
+ tape.zero()
451
+
452
+ # floor_divide. Returns integers so gradient is zero
453
+ tape = wp.Tape()
454
+ with tape:
455
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
456
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
457
+
458
+ tape.backward(loss=out)
459
+ expected = np.zeros_like(in1.numpy())
460
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
461
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
462
+ tape.zero()
463
+
464
+
465
+ def test_special_funcs(test, device, dtype, register_kernels=False):
466
+ rng = np.random.default_rng(123)
467
+
468
+ tol = {
469
+ np.float16: 1.0e-2,
470
+ np.float32: 1.0e-6,
471
+ np.float64: 1.0e-8,
472
+ }.get(dtype, 0)
473
+
474
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
475
+
476
+ def check_special_funcs(
477
+ inputs: wp.array(dtype=wptype, ndim=2),
478
+ outputs: wp.array(dtype=wptype, ndim=2),
479
+ ):
480
+ # multiply outputs by 2 so we've got something to backpropagate:
481
+ for i in range(10):
482
+ outputs[0, i] = wptype(2) * wp.log(inputs[0, i])
483
+ outputs[1, i] = wptype(2) * wp.log2(inputs[1, i])
484
+ outputs[2, i] = wptype(2) * wp.log10(inputs[2, i])
485
+ outputs[3, i] = wptype(2) * wp.exp(inputs[3, i])
486
+ outputs[4, i] = wptype(2) * wp.atan(inputs[4, i])
487
+ outputs[5, i] = wptype(2) * wp.sin(inputs[5, i])
488
+ outputs[6, i] = wptype(2) * wp.cos(inputs[6, i])
489
+ outputs[7, i] = wptype(2) * wp.sqrt(inputs[7, i])
490
+ outputs[8, i] = wptype(2) * wp.tan(inputs[8, i])
491
+ outputs[9, i] = wptype(2) * wp.sinh(inputs[9, i])
492
+ outputs[10, i] = wptype(2) * wp.cosh(inputs[10, i])
493
+ outputs[11, i] = wptype(2) * wp.tanh(inputs[11, i])
494
+ outputs[12, i] = wptype(2) * wp.acos(inputs[12, i])
495
+ outputs[13, i] = wptype(2) * wp.asin(inputs[13, i])
496
+ outputs[14, i] = wptype(2) * wp.cbrt(inputs[14, i])
497
+
498
+ kernel = getkernel(check_special_funcs, suffix=dtype.__name__)
499
+ output_select_kernel = get_select_kernel2(wptype)
500
+
501
+ if register_kernels:
502
+ return
503
+
504
+ invals = rng.normal(size=(15, 10)).astype(dtype)
505
+ invals[[0, 1, 2, 7, 14]] = 0.1 + np.abs(invals[[0, 1, 2, 7, 14]])
506
+ invals[12] = np.clip(invals[12], -0.9, 0.9)
507
+ invals[13] = np.clip(invals[13], -0.9, 0.9)
508
+ inputs = wp.array(invals, dtype=wptype, requires_grad=True, device=device)
509
+ outputs = wp.zeros_like(inputs)
510
+
511
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
512
+
513
+ assert_np_equal(outputs.numpy()[0], 2 * np.log(inputs.numpy()[0]), tol=tol)
514
+ assert_np_equal(outputs.numpy()[1], 2 * np.log2(inputs.numpy()[1]), tol=tol)
515
+ assert_np_equal(outputs.numpy()[2], 2 * np.log10(inputs.numpy()[2]), tol=tol)
516
+ assert_np_equal(outputs.numpy()[3], 2 * np.exp(inputs.numpy()[3]), tol=tol)
517
+ assert_np_equal(outputs.numpy()[4], 2 * np.arctan(inputs.numpy()[4]), tol=tol)
518
+ assert_np_equal(outputs.numpy()[5], 2 * np.sin(inputs.numpy()[5]), tol=tol)
519
+ assert_np_equal(outputs.numpy()[6], 2 * np.cos(inputs.numpy()[6]), tol=tol)
520
+ assert_np_equal(outputs.numpy()[7], 2 * np.sqrt(inputs.numpy()[7]), tol=tol)
521
+ assert_np_equal(outputs.numpy()[8], 2 * np.tan(inputs.numpy()[8]), tol=tol)
522
+ assert_np_equal(outputs.numpy()[9], 2 * np.sinh(inputs.numpy()[9]), tol=tol)
523
+ assert_np_equal(outputs.numpy()[10], 2 * np.cosh(inputs.numpy()[10]), tol=tol)
524
+ assert_np_equal(outputs.numpy()[11], 2 * np.tanh(inputs.numpy()[11]), tol=tol)
525
+ assert_np_equal(outputs.numpy()[12], 2 * np.arccos(inputs.numpy()[12]), tol=tol)
526
+ assert_np_equal(outputs.numpy()[13], 2 * np.arcsin(inputs.numpy()[13]), tol=tol)
527
+ assert_np_equal(outputs.numpy()[14], 2 * np.cbrt(inputs.numpy()[14]), tol=tol)
528
+
529
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
530
+ if dtype in np_float_types:
531
+ for i in range(10):
532
+ # log:
533
+ tape = wp.Tape()
534
+ with tape:
535
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
536
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
537
+
538
+ tape.backward(loss=out)
539
+ expected = np.zeros_like(inputs.numpy())
540
+ expected[0, i] = 2.0 / inputs.numpy()[0, i]
541
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
542
+ tape.zero()
543
+
544
+ # log2:
545
+ tape = wp.Tape()
546
+ with tape:
547
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
548
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
549
+
550
+ tape.backward(loss=out)
551
+ expected = np.zeros_like(inputs.numpy())
552
+ expected[1, i] = 2.0 / (inputs.numpy()[1, i] * np.log(2.0))
553
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
554
+ tape.zero()
555
+
556
+ # log10:
557
+ tape = wp.Tape()
558
+ with tape:
559
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
560
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
561
+
562
+ tape.backward(loss=out)
563
+ expected = np.zeros_like(inputs.numpy())
564
+ expected[2, i] = 2.0 / (inputs.numpy()[2, i] * np.log(10.0))
565
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
566
+ tape.zero()
567
+
568
+ # exp:
569
+ tape = wp.Tape()
570
+ with tape:
571
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
572
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
573
+
574
+ tape.backward(loss=out)
575
+ expected = np.zeros_like(inputs.numpy())
576
+ expected[3, i] = outputs.numpy()[3, i]
577
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
578
+ tape.zero()
579
+
580
+ # arctan:
581
+ # looks like the autodiff formula in warp was wrong? Was (1 + x^2) rather than
582
+ # 1/(1 + x^2)
583
+ tape = wp.Tape()
584
+ with tape:
585
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
586
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
587
+
588
+ tape.backward(loss=out)
589
+ expected = np.zeros_like(inputs.numpy())
590
+ expected[4, i] = 2.0 / (inputs.numpy()[4, i] ** 2 + 1)
591
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
592
+ tape.zero()
593
+
594
+ # sin:
595
+ tape = wp.Tape()
596
+ with tape:
597
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
598
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
599
+
600
+ tape.backward(loss=out)
601
+ expected = np.zeros_like(inputs.numpy())
602
+ expected[5, i] = np.cos(inputs.numpy()[5, i]) * 2
603
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
604
+ tape.zero()
605
+
606
+ # cos:
607
+ tape = wp.Tape()
608
+ with tape:
609
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
610
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
611
+
612
+ tape.backward(loss=out)
613
+ expected = np.zeros_like(inputs.numpy())
614
+ expected[6, i] = -np.sin(inputs.numpy()[6, i]) * 2.0
615
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
616
+ tape.zero()
617
+
618
+ # sqrt:
619
+ tape = wp.Tape()
620
+ with tape:
621
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
622
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
623
+
624
+ tape.backward(loss=out)
625
+ expected = np.zeros_like(inputs.numpy())
626
+ expected[7, i] = 1.0 / (np.sqrt(inputs.numpy()[7, i]))
627
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
628
+ tape.zero()
629
+
630
+ # tan:
631
+ # looks like there was a bug in autodiff formula here too - gradient was zero if cos(x) > 0
632
+ # (should have been "if(cosx != 0)")
633
+ tape = wp.Tape()
634
+ with tape:
635
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
636
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 8, i], outputs=[out], device=device)
637
+
638
+ tape.backward(loss=out)
639
+ expected = np.zeros_like(inputs.numpy())
640
+ expected[8, i] = 2.0 / (np.cos(inputs.numpy()[8, i]) ** 2)
641
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=200 * tol)
642
+ tape.zero()
643
+
644
+ # sinh:
645
+ tape = wp.Tape()
646
+ with tape:
647
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
648
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 9, i], outputs=[out], device=device)
649
+
650
+ tape.backward(loss=out)
651
+ expected = np.zeros_like(inputs.numpy())
652
+ expected[9, i] = 2.0 * np.cosh(inputs.numpy()[9, i])
653
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
654
+ tape.zero()
655
+
656
+ # cosh:
657
+ tape = wp.Tape()
658
+ with tape:
659
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
660
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 10, i], outputs=[out], device=device)
661
+
662
+ tape.backward(loss=out)
663
+ expected = np.zeros_like(inputs.numpy())
664
+ expected[10, i] = 2.0 * np.sinh(inputs.numpy()[10, i])
665
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
666
+ tape.zero()
667
+
668
+ # tanh:
669
+ tape = wp.Tape()
670
+ with tape:
671
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
672
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 11, i], outputs=[out], device=device)
673
+
674
+ tape.backward(loss=out)
675
+ expected = np.zeros_like(inputs.numpy())
676
+ expected[11, i] = 2.0 / (np.cosh(inputs.numpy()[11, i]) ** 2)
677
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
678
+ tape.zero()
679
+
680
+ # arccos:
681
+ tape = wp.Tape()
682
+ with tape:
683
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
684
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 12, i], outputs=[out], device=device)
685
+
686
+ tape.backward(loss=out)
687
+ expected = np.zeros_like(inputs.numpy())
688
+ expected[12, i] = -2.0 / np.sqrt(1 - inputs.numpy()[12, i] ** 2)
689
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
690
+ tape.zero()
691
+
692
+ # arcsin:
693
+ tape = wp.Tape()
694
+ with tape:
695
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
696
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 13, i], outputs=[out], device=device)
697
+
698
+ tape.backward(loss=out)
699
+ expected = np.zeros_like(inputs.numpy())
700
+ expected[13, i] = 2.0 / np.sqrt(1 - inputs.numpy()[13, i] ** 2)
701
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=6 * tol)
702
+ tape.zero()
703
+
704
+ # cbrt:
705
+ tape = wp.Tape()
706
+ with tape:
707
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
708
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 14, i], outputs=[out], device=device)
709
+
710
+ tape.backward(loss=out)
711
+ expected = np.zeros_like(inputs.numpy())
712
+ cbrt = np.cbrt(inputs.numpy()[14, i], dtype=np.dtype(dtype))
713
+ expected[14, i] = (2.0 / 3.0) * (1.0 / (cbrt * cbrt))
714
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
715
+ tape.zero()
716
+
717
+
718
+ def test_special_funcs_2arg(test, device, dtype, register_kernels=False):
719
+ rng = np.random.default_rng(123)
720
+
721
+ tol = {
722
+ np.float16: 1.0e-2,
723
+ np.float32: 1.0e-6,
724
+ np.float64: 1.0e-8,
725
+ }.get(dtype, 0)
726
+
727
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
728
+
729
+ def check_special_funcs_2arg(
730
+ in1: wp.array(dtype=wptype, ndim=2),
731
+ in2: wp.array(dtype=wptype, ndim=2),
732
+ outputs: wp.array(dtype=wptype, ndim=2),
733
+ ):
734
+ # multiply outputs by 2 so we've got something to backpropagate:
735
+ for i in range(10):
736
+ outputs[0, i] = wptype(2) * wp.pow(in1[0, i], in2[0, i])
737
+ outputs[1, i] = wptype(2) * wp.atan2(in1[1, i], in2[1, i])
738
+
739
+ kernel = getkernel(check_special_funcs_2arg, suffix=dtype.__name__)
740
+ output_select_kernel = get_select_kernel2(wptype)
741
+
742
+ if register_kernels:
743
+ return
744
+
745
+ in1 = wp.array(np.abs(randvals(rng, [2, 10], dtype)), dtype=wptype, requires_grad=True, device=device)
746
+ in2 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
747
+ outputs = wp.zeros_like(in1)
748
+
749
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
750
+
751
+ assert_np_equal(outputs.numpy()[0], 2.0 * np.power(in1.numpy()[0], in2.numpy()[0]), tol=tol)
752
+ assert_np_equal(outputs.numpy()[1], 2.0 * np.arctan2(in1.numpy()[1], in2.numpy()[1]), tol=tol)
753
+
754
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
755
+ if dtype in np_float_types:
756
+ for i in range(10):
757
+ # pow:
758
+ tape = wp.Tape()
759
+ with tape:
760
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
761
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
762
+ tape.backward(loss=out)
763
+ expected = np.zeros_like(in1.numpy())
764
+ expected[0, i] = 2.0 * in2.numpy()[0, i] * np.power(in1.numpy()[0, i], in2.numpy()[0, i] - 1)
765
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=5 * tol)
766
+ expected[0, i] = 2.0 * np.power(in1.numpy()[0, i], in2.numpy()[0, i]) * np.log(in1.numpy()[0, i])
767
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
768
+ tape.zero()
769
+
770
+ # atan2:
771
+ tape = wp.Tape()
772
+ with tape:
773
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
774
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
775
+
776
+ tape.backward(loss=out)
777
+ expected = np.zeros_like(in1.numpy())
778
+ expected[1, i] = 2.0 * in2.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
779
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
780
+ expected[1, i] = -2.0 * in1.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
781
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
782
+ tape.zero()
783
+
784
+
785
+ def test_float_to_int(test, device, dtype, register_kernels=False):
786
+ rng = np.random.default_rng(123)
787
+
788
+ tol = {
789
+ np.float16: 5.0e-3,
790
+ np.float32: 1.0e-6,
791
+ np.float64: 1.0e-8,
792
+ }.get(dtype, 0)
793
+
794
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
795
+
796
+ def check_float_to_int(
797
+ inputs: wp.array(dtype=wptype, ndim=2),
798
+ outputs: wp.array(dtype=wptype, ndim=2),
799
+ ):
800
+ for i in range(10):
801
+ outputs[0, i] = wp.round(inputs[0, i])
802
+ outputs[1, i] = wp.rint(inputs[1, i])
803
+ outputs[2, i] = wp.trunc(inputs[2, i])
804
+ outputs[3, i] = wp.floor(inputs[3, i])
805
+ outputs[4, i] = wp.ceil(inputs[4, i])
806
+ outputs[5, i] = wp.frac(inputs[5, i])
807
+
808
+ kernel = getkernel(check_float_to_int, suffix=dtype.__name__)
809
+ output_select_kernel = get_select_kernel2(wptype)
810
+
811
+ if register_kernels:
812
+ return
813
+
814
+ inputs = wp.array(rng.standard_normal(size=(6, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device)
815
+ outputs = wp.zeros_like(inputs)
816
+
817
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
818
+
819
+ assert_np_equal(outputs.numpy()[0], np.round(inputs.numpy()[0]))
820
+ assert_np_equal(outputs.numpy()[1], np.rint(inputs.numpy()[1]))
821
+ assert_np_equal(outputs.numpy()[2], np.trunc(inputs.numpy()[2]))
822
+ assert_np_equal(outputs.numpy()[3], np.floor(inputs.numpy()[3]))
823
+ assert_np_equal(outputs.numpy()[4], np.ceil(inputs.numpy()[4]))
824
+ assert_np_equal(outputs.numpy()[5], np.modf(inputs.numpy()[5])[0])
825
+
826
+ # all the gradients should be zero as these functions are piecewise constant:
827
+
828
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
829
+ for i in range(10):
830
+ for j in range(5):
831
+ tape = wp.Tape()
832
+ with tape:
833
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
834
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, j, i], outputs=[out], device=device)
835
+
836
+ tape.backward(loss=out)
837
+ assert_np_equal(tape.gradients[inputs].numpy(), np.zeros_like(inputs.numpy()), tol=tol)
838
+ tape.zero()
839
+
840
+
841
+ def test_infinity(test, device, dtype, register_kernels=False):
842
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
843
+
844
+ def check_infinity(
845
+ outputs: wp.array(dtype=wptype),
846
+ ):
847
+ outputs[0] = wptype(wp.inf)
848
+ outputs[1] = wptype(-wp.inf)
849
+ outputs[2] = wptype(2.0 * wp.inf)
850
+ outputs[3] = wptype(-2.0 * wp.inf)
851
+ outputs[4] = wptype(2.0 / 0.0)
852
+ outputs[5] = wptype(-2.0 / 0.0)
853
+
854
+ kernel = getkernel(check_infinity, suffix=dtype.__name__)
855
+
856
+ if register_kernels:
857
+ return
858
+
859
+ outputs = wp.zeros(6, dtype=wptype, device=device)
860
+
861
+ wp.launch(kernel, dim=1, inputs=[], outputs=[outputs], device=device)
862
+
863
+ test.assertEqual(outputs.numpy()[0], math.inf)
864
+ test.assertEqual(outputs.numpy()[1], -math.inf)
865
+ test.assertEqual(outputs.numpy()[2], math.inf)
866
+ test.assertEqual(outputs.numpy()[3], -math.inf)
867
+ test.assertEqual(outputs.numpy()[4], math.inf)
868
+ test.assertEqual(outputs.numpy()[5], -math.inf)
869
+
870
+
871
+ def test_interp(test, device, dtype, register_kernels=False):
872
+ rng = np.random.default_rng(123)
873
+
874
+ tol = {
875
+ np.float16: 1.0e-2,
876
+ np.float32: 5.0e-6,
877
+ np.float64: 1.0e-8,
878
+ }.get(dtype, 0)
879
+
880
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
881
+
882
+ def check_interp(
883
+ in1: wp.array(dtype=wptype, ndim=2),
884
+ in2: wp.array(dtype=wptype, ndim=2),
885
+ in3: wp.array(dtype=wptype, ndim=2),
886
+ outputs: wp.array(dtype=wptype, ndim=2),
887
+ ):
888
+ # multiply outputs by 2 so we've got something to backpropagate:
889
+ for i in range(10):
890
+ outputs[0, i] = wptype(2) * wp.smoothstep(in1[0, i], in2[0, i], in3[0, i])
891
+ outputs[1, i] = wptype(2) * wp.lerp(in1[1, i], in2[1, i], in3[1, i])
892
+
893
+ kernel = getkernel(check_interp, suffix=dtype.__name__)
894
+ output_select_kernel = get_select_kernel2(wptype)
895
+
896
+ if register_kernels:
897
+ return
898
+
899
+ e0 = randvals(rng, [2, 10], dtype)
900
+ e1 = e0 + randvals(rng, [2, 10], dtype) + 0.1
901
+ in1 = wp.array(e0, dtype=wptype, requires_grad=True, device=device)
902
+ in2 = wp.array(e1, dtype=wptype, requires_grad=True, device=device)
903
+ in3 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
904
+
905
+ outputs = wp.zeros_like(in1)
906
+
907
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
908
+
909
+ edge0 = in1.numpy()[0]
910
+ edge1 = in2.numpy()[0]
911
+ t_smoothstep = in3.numpy()[0]
912
+ x = np.clip((t_smoothstep - edge0) / (edge1 - edge0), 0, 1)
913
+ smoothstep_expected = 2.0 * x * x * (3 - 2 * x)
914
+
915
+ assert_np_equal(outputs.numpy()[0], smoothstep_expected, tol=tol)
916
+
917
+ a = in1.numpy()[1]
918
+ b = in2.numpy()[1]
919
+ t = in3.numpy()[1]
920
+ assert_np_equal(outputs.numpy()[1], 2.0 * (a * (1 - t) + b * t), tol=tol)
921
+
922
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
923
+ if dtype in np_float_types:
924
+ for i in range(10):
925
+ tape = wp.Tape()
926
+ with tape:
927
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
928
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
929
+ tape.backward(loss=out)
930
+
931
+ # e0 = in1
932
+ # e1 = in2
933
+ # t = in3
934
+
935
+ # x = clamp((t - e0) / (e1 - e0), 0,1)
936
+ # dx/dt = 1 / (e1 - e0) if e0 < t < e1 else 0
937
+
938
+ # y = x * x * (3 - 2 * x)
939
+
940
+ # y = 3 * x * x - 2 * x * x * x
941
+ # dy/dx = 6 * ( x - x^2 )
942
+ dydx = 6 * x * (1 - x)
943
+
944
+ # dy/in1 = dy/dx dx/de0 de0/din1
945
+ dxde0 = (t_smoothstep - edge1) / ((edge1 - edge0) ** 2)
946
+ dxde0[x == 0] = 0
947
+ dxde0[x == 1] = 0
948
+
949
+ expected_grads = np.zeros_like(in1.numpy())
950
+ expected_grads[0, i] = 2.0 * dydx[i] * dxde0[i]
951
+ assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
952
+
953
+ # dy/in2 = dy/dx dx/de1 de1/din2
954
+ dxde1 = (edge0 - t_smoothstep) / ((edge1 - edge0) ** 2)
955
+ dxde1[x == 0] = 0
956
+ dxde1[x == 1] = 0
957
+
958
+ expected_grads = np.zeros_like(in1.numpy())
959
+ expected_grads[0, i] = 2.0 * dydx[i] * dxde1[i]
960
+ assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
961
+
962
+ # dy/in3 = dy/dx dx/dt dt/din3
963
+ dxdt = 1.0 / (edge1 - edge0)
964
+ dxdt[x == 0] = 0
965
+ dxdt[x == 1] = 0
966
+
967
+ expected_grads = np.zeros_like(in1.numpy())
968
+ expected_grads[0, i] = 2.0 * dydx[i] * dxdt[i]
969
+ assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
970
+ tape.zero()
971
+
972
+ tape = wp.Tape()
973
+ with tape:
974
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
975
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
976
+ tape.backward(loss=out)
977
+
978
+ # y = a*(1-t) + b*t
979
+ # a = in1
980
+ # b = in2
981
+ # t = in3
982
+
983
+ # y = in1*( 1 - in3 ) + in2*in3
984
+
985
+ # dy/din1 = (1-in3)
986
+ expected_grads = np.zeros_like(in1.numpy())
987
+ expected_grads[1, i] = 2.0 * (1 - in3.numpy()[1, i])
988
+ assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
989
+
990
+ # dy/din2 = in3
991
+ expected_grads = np.zeros_like(in1.numpy())
992
+ expected_grads[1, i] = 2.0 * in3.numpy()[1, i]
993
+ assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
994
+
995
+ # dy/din3 = 8*in2 - 1.5*4*in1
996
+ expected_grads = np.zeros_like(in1.numpy())
997
+ expected_grads[1, i] = 2.0 * (in2.numpy()[1, i] - in1.numpy()[1, i])
998
+ assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
999
+ tape.zero()
1000
+
1001
+
1002
+ def test_clamp(test, device, dtype, register_kernels=False):
1003
+ rng = np.random.default_rng(123)
1004
+
1005
+ tol = {
1006
+ np.float16: 5.0e-3,
1007
+ np.float32: 1.0e-6,
1008
+ np.float64: 1.0e-6,
1009
+ }.get(dtype, 0)
1010
+
1011
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1012
+
1013
+ def check_clamp(
1014
+ in1: wp.array(dtype=wptype),
1015
+ in2: wp.array(dtype=wptype),
1016
+ in3: wp.array(dtype=wptype),
1017
+ outputs: wp.array(dtype=wptype),
1018
+ ):
1019
+ for i in range(100):
1020
+ # multiply output by 2 so we've got something to backpropagate:
1021
+ outputs[i] = wptype(2) * wp.clamp(in1[i], in2[i], in3[i])
1022
+
1023
+ kernel = getkernel(check_clamp, suffix=dtype.__name__)
1024
+ output_select_kernel = get_select_kernel(wptype)
1025
+
1026
+ if register_kernels:
1027
+ return
1028
+
1029
+ in1 = wp.array(randvals(rng, [100], dtype), dtype=wptype, requires_grad=True, device=device)
1030
+ starts = randvals(rng, [100], dtype)
1031
+ diffs = np.abs(randvals(rng, [100], dtype))
1032
+ in2 = wp.array(starts, dtype=wptype, requires_grad=True, device=device)
1033
+ in3 = wp.array(starts + diffs, dtype=wptype, requires_grad=True, device=device)
1034
+ outputs = wp.zeros_like(in1)
1035
+
1036
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1037
+
1038
+ assert_np_equal(2 * np.clip(in1.numpy(), in2.numpy(), in3.numpy()), outputs.numpy(), tol=tol)
1039
+
1040
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1041
+ if dtype in np_float_types:
1042
+ for i in range(100):
1043
+ tape = wp.Tape()
1044
+ with tape:
1045
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1046
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
1047
+
1048
+ tape.backward(loss=out)
1049
+ t = in1.numpy()[i]
1050
+ lower = in2.numpy()[i]
1051
+ upper = in3.numpy()[i]
1052
+ expected = np.zeros_like(in1.numpy())
1053
+ if t < lower:
1054
+ expected[i] = 2.0
1055
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1056
+ expected[i] = 0.0
1057
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1058
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1059
+ elif t > upper:
1060
+ expected[i] = 2.0
1061
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1062
+ expected[i] = 0.0
1063
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1064
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1065
+ else:
1066
+ expected[i] = 2.0
1067
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1068
+ expected[i] = 0.0
1069
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1070
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1071
+
1072
+ tape.zero()
1073
+
1074
+
1075
+ devices = get_test_devices()
1076
+
1077
+
1078
+ class TestArithmetic(unittest.TestCase):
1079
+ pass
1080
+
1081
+
1082
+ # these unary ops only make sense for signed values:
1083
+ for dtype in np_signed_int_types + np_float_types:
1084
+ add_function_test_register_kernel(
1085
+ TestArithmetic, f"test_unary_ops_{dtype.__name__}", test_unary_ops, devices=devices, dtype=dtype
1086
+ )
1087
+
1088
+ for dtype in np_float_types:
1089
+ add_function_test_register_kernel(
1090
+ TestArithmetic, f"test_special_funcs_{dtype.__name__}", test_special_funcs, devices=devices, dtype=dtype
1091
+ )
1092
+ add_function_test_register_kernel(
1093
+ TestArithmetic,
1094
+ f"test_special_funcs_2arg_{dtype.__name__}",
1095
+ test_special_funcs_2arg,
1096
+ devices=devices,
1097
+ dtype=dtype,
1098
+ )
1099
+ add_function_test_register_kernel(
1100
+ TestArithmetic, f"test_interp_{dtype.__name__}", test_interp, devices=devices, dtype=dtype
1101
+ )
1102
+ add_function_test_register_kernel(
1103
+ TestArithmetic, f"test_float_to_int_{dtype.__name__}", test_float_to_int, devices=devices, dtype=dtype
1104
+ )
1105
+ add_function_test_register_kernel(
1106
+ TestArithmetic, f"test_infinity_{dtype.__name__}", test_infinity, devices=devices, dtype=dtype
1107
+ )
1108
+
1109
+ for dtype in np_scalar_types:
1110
+ add_function_test_register_kernel(
1111
+ TestArithmetic, f"test_clamp_{dtype.__name__}", test_clamp, devices=devices, dtype=dtype
1112
+ )
1113
+ add_function_test_register_kernel(
1114
+ TestArithmetic, f"test_nonzero_{dtype.__name__}", test_nonzero, devices=devices, dtype=dtype
1115
+ )
1116
+ add_function_test(TestArithmetic, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
1117
+ add_function_test_register_kernel(
1118
+ TestArithmetic, f"test_binary_ops_{dtype.__name__}", test_binary_ops, devices=devices, dtype=dtype
1119
+ )
1120
+
1121
+
1122
+ if __name__ == "__main__":
1123
+ wp.build.clear_kernel_cache()
1124
+ unittest.main(verbosity=2, failfast=False)