warp-lang 1.0.2__py3-none-win_amd64.whl → 1.2.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (356) hide show
  1. warp/__init__.py +108 -97
  2. warp/__init__.pyi +1 -1
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +88 -113
  6. warp/build_dll.py +383 -375
  7. warp/builtins.py +3693 -3354
  8. warp/codegen.py +2925 -2792
  9. warp/config.py +40 -36
  10. warp/constants.py +49 -45
  11. warp/context.py +5409 -5102
  12. warp/dlpack.py +442 -442
  13. warp/examples/__init__.py +16 -16
  14. warp/examples/assets/bear.usd +0 -0
  15. warp/examples/assets/bunny.usd +0 -0
  16. warp/examples/assets/cartpole.urdf +110 -110
  17. warp/examples/assets/crazyflie.usd +0 -0
  18. warp/examples/assets/cube.usd +0 -0
  19. warp/examples/assets/nv_ant.xml +92 -92
  20. warp/examples/assets/nv_humanoid.xml +183 -183
  21. warp/examples/assets/quadruped.urdf +267 -267
  22. warp/examples/assets/rocks.nvdb +0 -0
  23. warp/examples/assets/rocks.usd +0 -0
  24. warp/examples/assets/sphere.usd +0 -0
  25. warp/examples/benchmarks/benchmark_api.py +381 -383
  26. warp/examples/benchmarks/benchmark_cloth.py +278 -277
  27. warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
  28. warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
  29. warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
  30. warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
  31. warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
  32. warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
  33. warp/examples/benchmarks/benchmark_cloth_warp.py +145 -146
  34. warp/examples/benchmarks/benchmark_launches.py +293 -295
  35. warp/examples/browse.py +29 -29
  36. warp/examples/core/example_dem.py +232 -219
  37. warp/examples/core/example_fluid.py +291 -267
  38. warp/examples/core/example_graph_capture.py +142 -126
  39. warp/examples/core/example_marching_cubes.py +186 -174
  40. warp/examples/core/example_mesh.py +172 -155
  41. warp/examples/core/example_mesh_intersect.py +203 -193
  42. warp/examples/core/example_nvdb.py +174 -170
  43. warp/examples/core/example_raycast.py +103 -90
  44. warp/examples/core/example_raymarch.py +197 -178
  45. warp/examples/core/example_render_opengl.py +183 -141
  46. warp/examples/core/example_sph.py +403 -387
  47. warp/examples/core/example_torch.py +219 -181
  48. warp/examples/core/example_wave.py +261 -248
  49. warp/examples/fem/bsr_utils.py +378 -380
  50. warp/examples/fem/example_apic_fluid.py +432 -389
  51. warp/examples/fem/example_burgers.py +262 -0
  52. warp/examples/fem/example_convection_diffusion.py +180 -168
  53. warp/examples/fem/example_convection_diffusion_dg.py +217 -209
  54. warp/examples/fem/example_deformed_geometry.py +175 -159
  55. warp/examples/fem/example_diffusion.py +199 -173
  56. warp/examples/fem/example_diffusion_3d.py +178 -152
  57. warp/examples/fem/example_diffusion_mgpu.py +219 -214
  58. warp/examples/fem/example_mixed_elasticity.py +242 -222
  59. warp/examples/fem/example_navier_stokes.py +257 -243
  60. warp/examples/fem/example_stokes.py +218 -192
  61. warp/examples/fem/example_stokes_transfer.py +263 -249
  62. warp/examples/fem/mesh_utils.py +133 -109
  63. warp/examples/fem/plot_utils.py +292 -287
  64. warp/examples/optim/example_bounce.py +258 -246
  65. warp/examples/optim/example_cloth_throw.py +220 -209
  66. warp/examples/optim/example_diffray.py +564 -536
  67. warp/examples/optim/example_drone.py +862 -835
  68. warp/examples/optim/example_inverse_kinematics.py +174 -168
  69. warp/examples/optim/example_inverse_kinematics_torch.py +183 -169
  70. warp/examples/optim/example_spring_cage.py +237 -231
  71. warp/examples/optim/example_trajectory.py +221 -199
  72. warp/examples/optim/example_walker.py +304 -293
  73. warp/examples/sim/example_cartpole.py +137 -129
  74. warp/examples/sim/example_cloth.py +194 -186
  75. warp/examples/sim/example_granular.py +122 -111
  76. warp/examples/sim/example_granular_collision_sdf.py +195 -186
  77. warp/examples/sim/example_jacobian_ik.py +234 -214
  78. warp/examples/sim/example_particle_chain.py +116 -105
  79. warp/examples/sim/example_quadruped.py +191 -180
  80. warp/examples/sim/example_rigid_chain.py +195 -187
  81. warp/examples/sim/example_rigid_contact.py +187 -177
  82. warp/examples/sim/example_rigid_force.py +125 -125
  83. warp/examples/sim/example_rigid_gyroscopic.py +107 -95
  84. warp/examples/sim/example_rigid_soft_contact.py +132 -122
  85. warp/examples/sim/example_soft_body.py +188 -177
  86. warp/fabric.py +337 -335
  87. warp/fem/__init__.py +61 -27
  88. warp/fem/cache.py +403 -388
  89. warp/fem/dirichlet.py +178 -179
  90. warp/fem/domain.py +262 -263
  91. warp/fem/field/__init__.py +100 -101
  92. warp/fem/field/field.py +148 -149
  93. warp/fem/field/nodal_field.py +298 -299
  94. warp/fem/field/restriction.py +22 -21
  95. warp/fem/field/test.py +180 -181
  96. warp/fem/field/trial.py +183 -183
  97. warp/fem/geometry/__init__.py +16 -19
  98. warp/fem/geometry/closest_point.py +69 -70
  99. warp/fem/geometry/deformed_geometry.py +270 -271
  100. warp/fem/geometry/element.py +748 -744
  101. warp/fem/geometry/geometry.py +184 -186
  102. warp/fem/geometry/grid_2d.py +380 -373
  103. warp/fem/geometry/grid_3d.py +437 -435
  104. warp/fem/geometry/hexmesh.py +953 -953
  105. warp/fem/geometry/nanogrid.py +455 -0
  106. warp/fem/geometry/partition.py +374 -376
  107. warp/fem/geometry/quadmesh_2d.py +532 -532
  108. warp/fem/geometry/tetmesh.py +840 -840
  109. warp/fem/geometry/trimesh_2d.py +577 -577
  110. warp/fem/integrate.py +1684 -1615
  111. warp/fem/operator.py +190 -191
  112. warp/fem/polynomial.py +214 -213
  113. warp/fem/quadrature/__init__.py +2 -2
  114. warp/fem/quadrature/pic_quadrature.py +243 -245
  115. warp/fem/quadrature/quadrature.py +295 -294
  116. warp/fem/space/__init__.py +179 -292
  117. warp/fem/space/basis_space.py +522 -489
  118. warp/fem/space/collocated_function_space.py +100 -105
  119. warp/fem/space/dof_mapper.py +236 -236
  120. warp/fem/space/function_space.py +148 -145
  121. warp/fem/space/grid_2d_function_space.py +148 -267
  122. warp/fem/space/grid_3d_function_space.py +167 -306
  123. warp/fem/space/hexmesh_function_space.py +253 -352
  124. warp/fem/space/nanogrid_function_space.py +202 -0
  125. warp/fem/space/partition.py +350 -350
  126. warp/fem/space/quadmesh_2d_function_space.py +261 -369
  127. warp/fem/space/restriction.py +161 -160
  128. warp/fem/space/shape/__init__.py +90 -15
  129. warp/fem/space/shape/cube_shape_function.py +728 -738
  130. warp/fem/space/shape/shape_function.py +102 -103
  131. warp/fem/space/shape/square_shape_function.py +611 -611
  132. warp/fem/space/shape/tet_shape_function.py +565 -567
  133. warp/fem/space/shape/triangle_shape_function.py +429 -429
  134. warp/fem/space/tetmesh_function_space.py +224 -292
  135. warp/fem/space/topology.py +297 -295
  136. warp/fem/space/trimesh_2d_function_space.py +153 -221
  137. warp/fem/types.py +77 -77
  138. warp/fem/utils.py +495 -495
  139. warp/jax.py +166 -141
  140. warp/jax_experimental.py +341 -339
  141. warp/native/array.h +1081 -1025
  142. warp/native/builtin.h +1603 -1560
  143. warp/native/bvh.cpp +402 -398
  144. warp/native/bvh.cu +533 -525
  145. warp/native/bvh.h +430 -429
  146. warp/native/clang/clang.cpp +496 -464
  147. warp/native/crt.cpp +42 -32
  148. warp/native/crt.h +352 -335
  149. warp/native/cuda_crt.h +1049 -1049
  150. warp/native/cuda_util.cpp +549 -540
  151. warp/native/cuda_util.h +288 -203
  152. warp/native/cutlass_gemm.cpp +34 -34
  153. warp/native/cutlass_gemm.cu +372 -372
  154. warp/native/error.cpp +66 -66
  155. warp/native/error.h +27 -27
  156. warp/native/exports.h +187 -0
  157. warp/native/fabric.h +228 -228
  158. warp/native/hashgrid.cpp +301 -278
  159. warp/native/hashgrid.cu +78 -77
  160. warp/native/hashgrid.h +227 -227
  161. warp/native/initializer_array.h +32 -32
  162. warp/native/intersect.h +1204 -1204
  163. warp/native/intersect_adj.h +365 -365
  164. warp/native/intersect_tri.h +322 -322
  165. warp/native/marching.cpp +2 -2
  166. warp/native/marching.cu +497 -497
  167. warp/native/marching.h +2 -2
  168. warp/native/mat.h +1545 -1498
  169. warp/native/matnn.h +333 -333
  170. warp/native/mesh.cpp +203 -203
  171. warp/native/mesh.cu +292 -293
  172. warp/native/mesh.h +1887 -1887
  173. warp/native/nanovdb/GridHandle.h +366 -0
  174. warp/native/nanovdb/HostBuffer.h +590 -0
  175. warp/native/nanovdb/NanoVDB.h +6624 -4782
  176. warp/native/nanovdb/PNanoVDB.h +3390 -2553
  177. warp/native/noise.h +850 -850
  178. warp/native/quat.h +1112 -1085
  179. warp/native/rand.h +303 -299
  180. warp/native/range.h +108 -108
  181. warp/native/reduce.cpp +156 -156
  182. warp/native/reduce.cu +348 -348
  183. warp/native/runlength_encode.cpp +61 -61
  184. warp/native/runlength_encode.cu +46 -46
  185. warp/native/scan.cpp +30 -30
  186. warp/native/scan.cu +36 -36
  187. warp/native/scan.h +7 -7
  188. warp/native/solid_angle.h +442 -442
  189. warp/native/sort.cpp +94 -94
  190. warp/native/sort.cu +97 -97
  191. warp/native/sort.h +14 -14
  192. warp/native/sparse.cpp +337 -337
  193. warp/native/sparse.cu +544 -544
  194. warp/native/spatial.h +630 -630
  195. warp/native/svd.h +562 -562
  196. warp/native/temp_buffer.h +30 -30
  197. warp/native/vec.h +1177 -1133
  198. warp/native/volume.cpp +529 -297
  199. warp/native/volume.cu +58 -32
  200. warp/native/volume.h +960 -538
  201. warp/native/volume_builder.cu +446 -425
  202. warp/native/volume_builder.h +34 -19
  203. warp/native/volume_impl.h +61 -0
  204. warp/native/warp.cpp +1057 -1052
  205. warp/native/warp.cu +2949 -2828
  206. warp/native/warp.h +321 -305
  207. warp/optim/__init__.py +9 -9
  208. warp/optim/adam.py +120 -120
  209. warp/optim/linear.py +1104 -939
  210. warp/optim/sgd.py +104 -92
  211. warp/render/__init__.py +10 -10
  212. warp/render/render_opengl.py +3356 -3204
  213. warp/render/render_usd.py +768 -749
  214. warp/render/utils.py +152 -150
  215. warp/sim/__init__.py +52 -59
  216. warp/sim/articulation.py +685 -685
  217. warp/sim/collide.py +1594 -1590
  218. warp/sim/import_mjcf.py +489 -481
  219. warp/sim/import_snu.py +220 -221
  220. warp/sim/import_urdf.py +536 -516
  221. warp/sim/import_usd.py +887 -881
  222. warp/sim/inertia.py +316 -317
  223. warp/sim/integrator.py +234 -233
  224. warp/sim/integrator_euler.py +1956 -1956
  225. warp/sim/integrator_featherstone.py +1917 -1991
  226. warp/sim/integrator_xpbd.py +3288 -3312
  227. warp/sim/model.py +4473 -4314
  228. warp/sim/particles.py +113 -112
  229. warp/sim/render.py +417 -403
  230. warp/sim/utils.py +413 -410
  231. warp/sparse.py +1289 -1227
  232. warp/stubs.py +2192 -2469
  233. warp/tape.py +1162 -225
  234. warp/tests/__init__.py +1 -1
  235. warp/tests/__main__.py +4 -4
  236. warp/tests/assets/test_index_grid.nvdb +0 -0
  237. warp/tests/assets/torus.usda +105 -105
  238. warp/tests/aux_test_class_kernel.py +26 -26
  239. warp/tests/aux_test_compile_consts_dummy.py +10 -10
  240. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
  241. warp/tests/aux_test_dependent.py +20 -22
  242. warp/tests/aux_test_grad_customs.py +21 -23
  243. warp/tests/aux_test_reference.py +9 -11
  244. warp/tests/aux_test_reference_reference.py +8 -10
  245. warp/tests/aux_test_square.py +15 -17
  246. warp/tests/aux_test_unresolved_func.py +14 -14
  247. warp/tests/aux_test_unresolved_symbol.py +14 -14
  248. warp/tests/disabled_kinematics.py +237 -239
  249. warp/tests/run_coverage_serial.py +31 -31
  250. warp/tests/test_adam.py +155 -157
  251. warp/tests/test_arithmetic.py +1088 -1124
  252. warp/tests/test_array.py +2415 -2326
  253. warp/tests/test_array_reduce.py +148 -150
  254. warp/tests/test_async.py +666 -656
  255. warp/tests/test_atomic.py +139 -141
  256. warp/tests/test_bool.py +212 -149
  257. warp/tests/test_builtins_resolution.py +1290 -1292
  258. warp/tests/test_bvh.py +162 -171
  259. warp/tests/test_closest_point_edge_edge.py +227 -228
  260. warp/tests/test_codegen.py +562 -553
  261. warp/tests/test_compile_consts.py +217 -101
  262. warp/tests/test_conditional.py +244 -246
  263. warp/tests/test_copy.py +230 -215
  264. warp/tests/test_ctypes.py +630 -632
  265. warp/tests/test_dense.py +65 -67
  266. warp/tests/test_devices.py +89 -98
  267. warp/tests/test_dlpack.py +528 -529
  268. warp/tests/test_examples.py +403 -378
  269. warp/tests/test_fabricarray.py +952 -955
  270. warp/tests/test_fast_math.py +60 -54
  271. warp/tests/test_fem.py +1298 -1278
  272. warp/tests/test_fp16.py +128 -130
  273. warp/tests/test_func.py +336 -337
  274. warp/tests/test_generics.py +596 -571
  275. warp/tests/test_grad.py +885 -640
  276. warp/tests/test_grad_customs.py +331 -336
  277. warp/tests/test_hash_grid.py +208 -164
  278. warp/tests/test_import.py +37 -39
  279. warp/tests/test_indexedarray.py +1132 -1134
  280. warp/tests/test_intersect.py +65 -67
  281. warp/tests/test_jax.py +305 -307
  282. warp/tests/test_large.py +169 -164
  283. warp/tests/test_launch.py +352 -354
  284. warp/tests/test_lerp.py +217 -261
  285. warp/tests/test_linear_solvers.py +189 -171
  286. warp/tests/test_lvalue.py +419 -493
  287. warp/tests/test_marching_cubes.py +63 -65
  288. warp/tests/test_mat.py +1799 -1827
  289. warp/tests/test_mat_lite.py +113 -115
  290. warp/tests/test_mat_scalar_ops.py +2905 -2889
  291. warp/tests/test_math.py +124 -193
  292. warp/tests/test_matmul.py +498 -499
  293. warp/tests/test_matmul_lite.py +408 -410
  294. warp/tests/test_mempool.py +186 -190
  295. warp/tests/test_mesh.py +281 -324
  296. warp/tests/test_mesh_query_aabb.py +226 -241
  297. warp/tests/test_mesh_query_point.py +690 -702
  298. warp/tests/test_mesh_query_ray.py +290 -303
  299. warp/tests/test_mlp.py +274 -276
  300. warp/tests/test_model.py +108 -110
  301. warp/tests/test_module_hashing.py +111 -0
  302. warp/tests/test_modules_lite.py +36 -39
  303. warp/tests/test_multigpu.py +161 -163
  304. warp/tests/test_noise.py +244 -248
  305. warp/tests/test_operators.py +248 -250
  306. warp/tests/test_options.py +121 -125
  307. warp/tests/test_peer.py +131 -137
  308. warp/tests/test_pinned.py +76 -78
  309. warp/tests/test_print.py +52 -54
  310. warp/tests/test_quat.py +2084 -2086
  311. warp/tests/test_rand.py +324 -288
  312. warp/tests/test_reload.py +207 -217
  313. warp/tests/test_rounding.py +177 -179
  314. warp/tests/test_runlength_encode.py +188 -190
  315. warp/tests/test_sim_grad.py +241 -0
  316. warp/tests/test_sim_kinematics.py +89 -97
  317. warp/tests/test_smoothstep.py +166 -168
  318. warp/tests/test_snippet.py +303 -266
  319. warp/tests/test_sparse.py +466 -460
  320. warp/tests/test_spatial.py +2146 -2148
  321. warp/tests/test_special_values.py +362 -0
  322. warp/tests/test_streams.py +484 -473
  323. warp/tests/test_struct.py +708 -675
  324. warp/tests/test_tape.py +171 -148
  325. warp/tests/test_torch.py +741 -743
  326. warp/tests/test_transient_module.py +85 -87
  327. warp/tests/test_types.py +554 -659
  328. warp/tests/test_utils.py +488 -499
  329. warp/tests/test_vec.py +1262 -1268
  330. warp/tests/test_vec_lite.py +71 -73
  331. warp/tests/test_vec_scalar_ops.py +2097 -2099
  332. warp/tests/test_verify_fp.py +92 -94
  333. warp/tests/test_volume.py +961 -736
  334. warp/tests/test_volume_write.py +338 -265
  335. warp/tests/unittest_serial.py +38 -37
  336. warp/tests/unittest_suites.py +367 -359
  337. warp/tests/unittest_utils.py +434 -578
  338. warp/tests/unused_test_misc.py +69 -71
  339. warp/tests/walkthrough_debug.py +85 -85
  340. warp/thirdparty/appdirs.py +598 -598
  341. warp/thirdparty/dlpack.py +143 -143
  342. warp/thirdparty/unittest_parallel.py +563 -561
  343. warp/torch.py +321 -295
  344. warp/types.py +4941 -4450
  345. warp/utils.py +1008 -821
  346. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/LICENSE.md +126 -126
  347. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/METADATA +365 -400
  348. warp_lang-1.2.0.dist-info/RECORD +359 -0
  349. warp/examples/assets/cube.usda +0 -42
  350. warp/examples/assets/sphere.usda +0 -56
  351. warp/examples/assets/torus.usda +0 -105
  352. warp/examples/fem/example_convection_diffusion_dg0.py +0 -194
  353. warp/native/nanovdb/PNanoVDBWrite.h +0 -295
  354. warp_lang-1.0.2.dist-info/RECORD +0 -352
  355. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/WHEEL +0 -0
  356. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,1124 +1,1088 @@
1
- # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
- # NVIDIA CORPORATION and its licensors retain all intellectual property
3
- # and proprietary rights in and to this software, related documentation
4
- # and any modifications thereto. Any use, reproduction, disclosure or
5
- # distribution of this software and related documentation without an express
6
- # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
-
8
- import math
9
- import unittest
10
-
11
- import numpy as np
12
-
13
- import warp as wp
14
- from warp.tests.unittest_utils import *
15
-
16
- wp.init()
17
-
18
- np_signed_int_types = [
19
- np.int8,
20
- np.int16,
21
- np.int32,
22
- np.int64,
23
- np.byte,
24
- ]
25
-
26
- np_unsigned_int_types = [
27
- np.uint8,
28
- np.uint16,
29
- np.uint32,
30
- np.uint64,
31
- np.ubyte,
32
- ]
33
-
34
- np_int_types = np_signed_int_types + np_unsigned_int_types
35
-
36
- np_float_types = [np.float16, np.float32, np.float64]
37
-
38
- np_scalar_types = np_int_types + np_float_types
39
-
40
-
41
- def randvals(rng, shape, dtype):
42
- if dtype in np_float_types:
43
- return rng.standard_normal(size=shape).astype(dtype)
44
- elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
45
- return rng.integers(1, high=3, size=shape, dtype=dtype)
46
- return rng.integers(1, high=5, size=shape, dtype=dtype)
47
-
48
-
49
- kernel_cache = dict()
50
-
51
-
52
- def getkernel(func, suffix=""):
53
- key = func.__name__ + "_" + suffix
54
- if key not in kernel_cache:
55
- kernel_cache[key] = wp.Kernel(func=func, key=key)
56
- return kernel_cache[key]
57
-
58
-
59
- def get_select_kernel(dtype):
60
- def output_select_kernel_fn(
61
- input: wp.array(dtype=dtype),
62
- index: int,
63
- out: wp.array(dtype=dtype),
64
- ):
65
- out[0] = input[index]
66
-
67
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
68
-
69
-
70
- def get_select_kernel2(dtype):
71
- def output_select_kernel2_fn(
72
- input: wp.array(dtype=dtype, ndim=2),
73
- index0: int,
74
- index1: int,
75
- out: wp.array(dtype=dtype),
76
- ):
77
- out[0] = input[index0, index1]
78
-
79
- return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
80
-
81
-
82
- def test_arrays(test, device, dtype):
83
- rng = np.random.default_rng(123)
84
-
85
- tol = {
86
- np.float16: 1.0e-3,
87
- np.float32: 1.0e-6,
88
- np.float64: 1.0e-8,
89
- }.get(dtype, 0)
90
-
91
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
92
- arr_np = randvals(rng, (10, 5), dtype)
93
- arr = wp.array(arr_np, dtype=wptype, requires_grad=True, device=device)
94
-
95
- assert_np_equal(arr.numpy(), arr_np, tol=tol)
96
-
97
-
98
- def test_unary_ops(test, device, dtype, register_kernels=False):
99
- rng = np.random.default_rng(123)
100
-
101
- tol = {
102
- np.float16: 5.0e-3,
103
- np.float32: 1.0e-6,
104
- np.float64: 1.0e-8,
105
- }.get(dtype, 0)
106
-
107
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
108
-
109
- def check_unary(
110
- inputs: wp.array(dtype=wptype, ndim=2),
111
- outputs: wp.array(dtype=wptype, ndim=2),
112
- ):
113
- for i in range(10):
114
- i0 = inputs[0, i]
115
- i1 = inputs[1, i]
116
- i2 = inputs[2, i]
117
- i3 = inputs[3, i]
118
- i4 = inputs[4, i]
119
-
120
- # multiply outputs by 2 so we've got something to backpropagate:
121
- outputs[0, i] = wptype(2.0) * (+i0)
122
- outputs[1, i] = wptype(2.0) * (-i1)
123
- outputs[2, i] = wptype(2.0) * wp.sign(i2)
124
- outputs[3, i] = wptype(2.0) * wp.abs(i3)
125
- outputs[4, i] = wptype(2.0) * wp.step(i4)
126
-
127
- kernel = getkernel(check_unary, suffix=dtype.__name__)
128
- output_select_kernel = get_select_kernel2(wptype)
129
-
130
- if register_kernels:
131
- return
132
-
133
- if dtype in np_float_types:
134
- inputs = wp.array(
135
- rng.standard_normal(size=(5, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device
136
- )
137
- else:
138
- inputs = wp.array(
139
- rng.integers(-2, high=3, size=(5, 10), dtype=dtype), dtype=wptype, requires_grad=True, device=device
140
- )
141
- outputs = wp.zeros_like(inputs)
142
-
143
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
144
- assert_np_equal(outputs.numpy()[0], 2 * inputs.numpy()[0], tol=tol)
145
- assert_np_equal(outputs.numpy()[1], -2 * inputs.numpy()[1], tol=tol)
146
- expected = 2 * np.sign(inputs.numpy()[2])
147
- expected[expected == 0] = 2
148
- assert_np_equal(outputs.numpy()[2], expected, tol=tol)
149
- assert_np_equal(outputs.numpy()[3], 2 * np.abs(inputs.numpy()[3]), tol=tol)
150
- assert_np_equal(outputs.numpy()[4], 2 * (1 - np.heaviside(inputs.numpy()[4], 1)), tol=tol)
151
-
152
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
153
- if dtype in np_float_types:
154
- for i in range(10):
155
- # grad of 2x:
156
- tape = wp.Tape()
157
- with tape:
158
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
159
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
160
-
161
- tape.backward(loss=out)
162
- expected_grads = np.zeros_like(inputs.numpy())
163
- expected_grads[0, i] = 2
164
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
165
- tape.zero()
166
-
167
- # grad of -2x:
168
- tape = wp.Tape()
169
- with tape:
170
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
171
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
172
-
173
- tape.backward(loss=out)
174
- expected_grads = np.zeros_like(inputs.numpy())
175
- expected_grads[1, i] = -2
176
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
177
- tape.zero()
178
-
179
- # grad of 2 * sign(x):
180
- tape = wp.Tape()
181
- with tape:
182
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
183
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
184
-
185
- tape.backward(loss=out)
186
- expected_grads = np.zeros_like(inputs.numpy())
187
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
188
- tape.zero()
189
-
190
- # grad of 2 * abs(x):
191
- tape = wp.Tape()
192
- with tape:
193
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
194
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
195
-
196
- tape.backward(loss=out)
197
- expected_grads = np.zeros_like(inputs.numpy())
198
- expected_grads[3, i] = 2 * np.sign(inputs.numpy()[3, i])
199
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
200
- tape.zero()
201
-
202
- # grad of 2 * step(x):
203
- tape = wp.Tape()
204
- with tape:
205
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
206
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
207
-
208
- tape.backward(loss=out)
209
- expected_grads = np.zeros_like(inputs.numpy())
210
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
211
- tape.zero()
212
-
213
-
214
- def test_nonzero(test, device, dtype, register_kernels=False):
215
- rng = np.random.default_rng(123)
216
-
217
- tol = {
218
- np.float16: 5.0e-3,
219
- np.float32: 1.0e-6,
220
- np.float64: 1.0e-8,
221
- }.get(dtype, 0)
222
-
223
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
224
-
225
- def check_nonzero(
226
- inputs: wp.array(dtype=wptype),
227
- outputs: wp.array(dtype=wptype),
228
- ):
229
- for i in range(10):
230
- i0 = inputs[i]
231
- outputs[i] = wp.nonzero(i0)
232
-
233
- kernel = getkernel(check_nonzero, suffix=dtype.__name__)
234
- output_select_kernel = get_select_kernel(wptype)
235
-
236
- if register_kernels:
237
- return
238
-
239
- inputs = wp.array(rng.integers(-2, high=3, size=10).astype(dtype), dtype=wptype, requires_grad=True, device=device)
240
- outputs = wp.zeros_like(inputs)
241
-
242
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
243
- assert_np_equal(outputs.numpy(), (inputs.numpy() != 0))
244
-
245
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
246
- if dtype in np_float_types:
247
- for i in range(10):
248
- # grad should just be zero:
249
- tape = wp.Tape()
250
- with tape:
251
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
252
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
253
-
254
- tape.backward(loss=out)
255
- expected_grads = np.zeros_like(inputs.numpy())
256
- assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
257
- tape.zero()
258
-
259
-
260
- def test_binary_ops(test, device, dtype, register_kernels=False):
261
- rng = np.random.default_rng(123)
262
-
263
- tol = {
264
- np.float16: 5.0e-2,
265
- np.float32: 1.0e-6,
266
- np.float64: 1.0e-8,
267
- }.get(dtype, 0)
268
-
269
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
270
-
271
- def check_binary_ops(
272
- in1: wp.array(dtype=wptype, ndim=2),
273
- in2: wp.array(dtype=wptype, ndim=2),
274
- outputs: wp.array(dtype=wptype, ndim=2),
275
- ):
276
- for i in range(10):
277
- i0 = in1[0, i]
278
- i1 = in1[1, i]
279
- i2 = in1[2, i]
280
- i3 = in1[3, i]
281
- i4 = in1[4, i]
282
- i5 = in1[5, i]
283
- i6 = in1[6, i]
284
- i7 = in1[7, i]
285
-
286
- j0 = in2[0, i]
287
- j1 = in2[1, i]
288
- j2 = in2[2, i]
289
- j3 = in2[3, i]
290
- j4 = in2[4, i]
291
- j5 = in2[5, i]
292
- j6 = in2[6, i]
293
- j7 = in2[7, i]
294
-
295
- outputs[0, i] = wptype(2) * wp.mul(i0, j0)
296
- outputs[1, i] = wptype(2) * wp.div(i1, j1)
297
- outputs[2, i] = wptype(2) * wp.add(i2, j2)
298
- outputs[3, i] = wptype(2) * wp.sub(i3, j3)
299
- outputs[4, i] = wptype(2) * wp.mod(i4, j4)
300
- outputs[5, i] = wptype(2) * wp.min(i5, j5)
301
- outputs[6, i] = wptype(2) * wp.max(i6, j6)
302
- outputs[7, i] = wptype(2) * wp.floordiv(i7, j7)
303
-
304
- kernel = getkernel(check_binary_ops, suffix=dtype.__name__)
305
- output_select_kernel = get_select_kernel2(wptype)
306
-
307
- if register_kernels:
308
- return
309
-
310
- vals1 = randvals(rng, [8, 10], dtype)
311
- if dtype in [np_unsigned_int_types]:
312
- vals2 = vals1 + randvals(rng, [8, 10], dtype)
313
- else:
314
- vals2 = np.abs(randvals(rng, [8, 10], dtype))
315
-
316
- in1 = wp.array(vals1, dtype=wptype, requires_grad=True, device=device)
317
- in2 = wp.array(vals2, dtype=wptype, requires_grad=True, device=device)
318
-
319
- outputs = wp.zeros_like(in1)
320
-
321
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
322
-
323
- assert_np_equal(outputs.numpy()[0], 2 * in1.numpy()[0] * in2.numpy()[0], tol=tol)
324
- if dtype in np_float_types:
325
- assert_np_equal(outputs.numpy()[1], 2 * in1.numpy()[1] / (in2.numpy()[1]), tol=tol)
326
- else:
327
- assert_np_equal(outputs.numpy()[1], 2 * (in1.numpy()[1] // (in2.numpy()[1])), tol=tol)
328
- assert_np_equal(outputs.numpy()[2], 2 * (in1.numpy()[2] + (in2.numpy()[2])), tol=tol)
329
- assert_np_equal(outputs.numpy()[3], 2 * (in1.numpy()[3] - (in2.numpy()[3])), tol=tol)
330
-
331
- # ...so this is actually the desired behaviour right? Looks like wp.mod doesn't behave like
332
- # python's % operator or np.mod()...
333
- assert_np_equal(
334
- outputs.numpy()[4],
335
- 2
336
- * (
337
- (in1.numpy()[4])
338
- - (in2.numpy()[4]) * np.sign(in1.numpy()[4]) * np.floor(np.abs(in1.numpy()[4]) / (in2.numpy()[4]))
339
- ),
340
- tol=tol,
341
- )
342
-
343
- assert_np_equal(outputs.numpy()[5], 2 * np.minimum(in1.numpy()[5], in2.numpy()[5]), tol=tol)
344
- assert_np_equal(outputs.numpy()[6], 2 * np.maximum(in1.numpy()[6], in2.numpy()[6]), tol=tol)
345
- assert_np_equal(outputs.numpy()[7], 2 * np.floor_divide(in1.numpy()[7], in2.numpy()[7]), tol=tol)
346
-
347
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
348
- if dtype in np_float_types:
349
- for i in range(10):
350
- # multiplication:
351
- tape = wp.Tape()
352
- with tape:
353
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
354
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
355
-
356
- tape.backward(loss=out)
357
- expected = np.zeros_like(in1.numpy())
358
- expected[0, i] = 2.0 * in2.numpy()[0, i]
359
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
360
- expected[0, i] = 2.0 * in1.numpy()[0, i]
361
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
362
- tape.zero()
363
-
364
- # division:
365
- tape = wp.Tape()
366
- with tape:
367
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
368
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
369
-
370
- tape.backward(loss=out)
371
- expected = np.zeros_like(in1.numpy())
372
- expected[1, i] = 2.0 / (in2.numpy()[1, i])
373
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
374
- # y = x1/x2
375
- # dy/dx2 = -x1/x2^2
376
- expected[1, i] = (-2.0) * (in1.numpy()[1, i] / (in2.numpy()[1, i] ** 2))
377
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
378
- tape.zero()
379
-
380
- # addition:
381
- tape = wp.Tape()
382
- with tape:
383
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
384
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
385
-
386
- tape.backward(loss=out)
387
- expected = np.zeros_like(in1.numpy())
388
- expected[2, i] = 2.0
389
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
390
- expected[2, i] = 2.0
391
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
392
- tape.zero()
393
-
394
- # subtraction:
395
- tape = wp.Tape()
396
- with tape:
397
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
398
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
399
-
400
- tape.backward(loss=out)
401
- expected = np.zeros_like(in1.numpy())
402
- expected[3, i] = 2.0
403
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
404
- expected[3, i] = -2.0
405
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
406
- tape.zero()
407
-
408
- # modulus. unless at discontinuities,
409
- # d/dx1( x1 % x2 ) == 1
410
- # d/dx2( x1 % x2 ) == 0
411
- tape = wp.Tape()
412
- with tape:
413
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
414
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
415
-
416
- tape.backward(loss=out)
417
- expected = np.zeros_like(in1.numpy())
418
- expected[4, i] = 2.0
419
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
420
- expected[4, i] = 0.0
421
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
422
- tape.zero()
423
-
424
- # min
425
- tape = wp.Tape()
426
- with tape:
427
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
428
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
429
-
430
- tape.backward(loss=out)
431
- expected = np.zeros_like(in1.numpy())
432
- expected[5, i] = 2.0 if (in1.numpy()[5, i] < in2.numpy()[5, i]) else 0.0
433
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
434
- expected[5, i] = 2.0 if (in2.numpy()[5, i] < in1.numpy()[5, i]) else 0.0
435
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
436
- tape.zero()
437
-
438
- # max
439
- tape = wp.Tape()
440
- with tape:
441
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
442
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
443
-
444
- tape.backward(loss=out)
445
- expected = np.zeros_like(in1.numpy())
446
- expected[6, i] = 2.0 if (in1.numpy()[6, i] > in2.numpy()[6, i]) else 0.0
447
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
448
- expected[6, i] = 2.0 if (in2.numpy()[6, i] > in1.numpy()[6, i]) else 0.0
449
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
450
- tape.zero()
451
-
452
- # floor_divide. Returns integers so gradient is zero
453
- tape = wp.Tape()
454
- with tape:
455
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
456
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
457
-
458
- tape.backward(loss=out)
459
- expected = np.zeros_like(in1.numpy())
460
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
461
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
462
- tape.zero()
463
-
464
-
465
- def test_special_funcs(test, device, dtype, register_kernels=False):
466
- rng = np.random.default_rng(123)
467
-
468
- tol = {
469
- np.float16: 1.0e-2,
470
- np.float32: 1.0e-6,
471
- np.float64: 1.0e-8,
472
- }.get(dtype, 0)
473
-
474
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
475
-
476
- def check_special_funcs(
477
- inputs: wp.array(dtype=wptype, ndim=2),
478
- outputs: wp.array(dtype=wptype, ndim=2),
479
- ):
480
- # multiply outputs by 2 so we've got something to backpropagate:
481
- for i in range(10):
482
- outputs[0, i] = wptype(2) * wp.log(inputs[0, i])
483
- outputs[1, i] = wptype(2) * wp.log2(inputs[1, i])
484
- outputs[2, i] = wptype(2) * wp.log10(inputs[2, i])
485
- outputs[3, i] = wptype(2) * wp.exp(inputs[3, i])
486
- outputs[4, i] = wptype(2) * wp.atan(inputs[4, i])
487
- outputs[5, i] = wptype(2) * wp.sin(inputs[5, i])
488
- outputs[6, i] = wptype(2) * wp.cos(inputs[6, i])
489
- outputs[7, i] = wptype(2) * wp.sqrt(inputs[7, i])
490
- outputs[8, i] = wptype(2) * wp.tan(inputs[8, i])
491
- outputs[9, i] = wptype(2) * wp.sinh(inputs[9, i])
492
- outputs[10, i] = wptype(2) * wp.cosh(inputs[10, i])
493
- outputs[11, i] = wptype(2) * wp.tanh(inputs[11, i])
494
- outputs[12, i] = wptype(2) * wp.acos(inputs[12, i])
495
- outputs[13, i] = wptype(2) * wp.asin(inputs[13, i])
496
- outputs[14, i] = wptype(2) * wp.cbrt(inputs[14, i])
497
-
498
- kernel = getkernel(check_special_funcs, suffix=dtype.__name__)
499
- output_select_kernel = get_select_kernel2(wptype)
500
-
501
- if register_kernels:
502
- return
503
-
504
- invals = rng.normal(size=(15, 10)).astype(dtype)
505
- invals[[0, 1, 2, 7, 14]] = 0.1 + np.abs(invals[[0, 1, 2, 7, 14]])
506
- invals[12] = np.clip(invals[12], -0.9, 0.9)
507
- invals[13] = np.clip(invals[13], -0.9, 0.9)
508
- inputs = wp.array(invals, dtype=wptype, requires_grad=True, device=device)
509
- outputs = wp.zeros_like(inputs)
510
-
511
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
512
-
513
- assert_np_equal(outputs.numpy()[0], 2 * np.log(inputs.numpy()[0]), tol=tol)
514
- assert_np_equal(outputs.numpy()[1], 2 * np.log2(inputs.numpy()[1]), tol=tol)
515
- assert_np_equal(outputs.numpy()[2], 2 * np.log10(inputs.numpy()[2]), tol=tol)
516
- assert_np_equal(outputs.numpy()[3], 2 * np.exp(inputs.numpy()[3]), tol=tol)
517
- assert_np_equal(outputs.numpy()[4], 2 * np.arctan(inputs.numpy()[4]), tol=tol)
518
- assert_np_equal(outputs.numpy()[5], 2 * np.sin(inputs.numpy()[5]), tol=tol)
519
- assert_np_equal(outputs.numpy()[6], 2 * np.cos(inputs.numpy()[6]), tol=tol)
520
- assert_np_equal(outputs.numpy()[7], 2 * np.sqrt(inputs.numpy()[7]), tol=tol)
521
- assert_np_equal(outputs.numpy()[8], 2 * np.tan(inputs.numpy()[8]), tol=tol)
522
- assert_np_equal(outputs.numpy()[9], 2 * np.sinh(inputs.numpy()[9]), tol=tol)
523
- assert_np_equal(outputs.numpy()[10], 2 * np.cosh(inputs.numpy()[10]), tol=tol)
524
- assert_np_equal(outputs.numpy()[11], 2 * np.tanh(inputs.numpy()[11]), tol=tol)
525
- assert_np_equal(outputs.numpy()[12], 2 * np.arccos(inputs.numpy()[12]), tol=tol)
526
- assert_np_equal(outputs.numpy()[13], 2 * np.arcsin(inputs.numpy()[13]), tol=tol)
527
- assert_np_equal(outputs.numpy()[14], 2 * np.cbrt(inputs.numpy()[14]), tol=tol)
528
-
529
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
530
- if dtype in np_float_types:
531
- for i in range(10):
532
- # log:
533
- tape = wp.Tape()
534
- with tape:
535
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
536
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
537
-
538
- tape.backward(loss=out)
539
- expected = np.zeros_like(inputs.numpy())
540
- expected[0, i] = 2.0 / inputs.numpy()[0, i]
541
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
542
- tape.zero()
543
-
544
- # log2:
545
- tape = wp.Tape()
546
- with tape:
547
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
548
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
549
-
550
- tape.backward(loss=out)
551
- expected = np.zeros_like(inputs.numpy())
552
- expected[1, i] = 2.0 / (inputs.numpy()[1, i] * np.log(2.0))
553
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
554
- tape.zero()
555
-
556
- # log10:
557
- tape = wp.Tape()
558
- with tape:
559
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
560
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
561
-
562
- tape.backward(loss=out)
563
- expected = np.zeros_like(inputs.numpy())
564
- expected[2, i] = 2.0 / (inputs.numpy()[2, i] * np.log(10.0))
565
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
566
- tape.zero()
567
-
568
- # exp:
569
- tape = wp.Tape()
570
- with tape:
571
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
572
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
573
-
574
- tape.backward(loss=out)
575
- expected = np.zeros_like(inputs.numpy())
576
- expected[3, i] = outputs.numpy()[3, i]
577
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
578
- tape.zero()
579
-
580
- # arctan:
581
- # looks like the autodiff formula in warp was wrong? Was (1 + x^2) rather than
582
- # 1/(1 + x^2)
583
- tape = wp.Tape()
584
- with tape:
585
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
586
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
587
-
588
- tape.backward(loss=out)
589
- expected = np.zeros_like(inputs.numpy())
590
- expected[4, i] = 2.0 / (inputs.numpy()[4, i] ** 2 + 1)
591
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
592
- tape.zero()
593
-
594
- # sin:
595
- tape = wp.Tape()
596
- with tape:
597
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
598
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
599
-
600
- tape.backward(loss=out)
601
- expected = np.zeros_like(inputs.numpy())
602
- expected[5, i] = np.cos(inputs.numpy()[5, i]) * 2
603
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
604
- tape.zero()
605
-
606
- # cos:
607
- tape = wp.Tape()
608
- with tape:
609
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
610
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
611
-
612
- tape.backward(loss=out)
613
- expected = np.zeros_like(inputs.numpy())
614
- expected[6, i] = -np.sin(inputs.numpy()[6, i]) * 2.0
615
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
616
- tape.zero()
617
-
618
- # sqrt:
619
- tape = wp.Tape()
620
- with tape:
621
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
622
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
623
-
624
- tape.backward(loss=out)
625
- expected = np.zeros_like(inputs.numpy())
626
- expected[7, i] = 1.0 / (np.sqrt(inputs.numpy()[7, i]))
627
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
628
- tape.zero()
629
-
630
- # tan:
631
- # looks like there was a bug in autodiff formula here too - gradient was zero if cos(x) > 0
632
- # (should have been "if(cosx != 0)")
633
- tape = wp.Tape()
634
- with tape:
635
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
636
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 8, i], outputs=[out], device=device)
637
-
638
- tape.backward(loss=out)
639
- expected = np.zeros_like(inputs.numpy())
640
- expected[8, i] = 2.0 / (np.cos(inputs.numpy()[8, i]) ** 2)
641
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=200 * tol)
642
- tape.zero()
643
-
644
- # sinh:
645
- tape = wp.Tape()
646
- with tape:
647
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
648
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 9, i], outputs=[out], device=device)
649
-
650
- tape.backward(loss=out)
651
- expected = np.zeros_like(inputs.numpy())
652
- expected[9, i] = 2.0 * np.cosh(inputs.numpy()[9, i])
653
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
654
- tape.zero()
655
-
656
- # cosh:
657
- tape = wp.Tape()
658
- with tape:
659
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
660
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 10, i], outputs=[out], device=device)
661
-
662
- tape.backward(loss=out)
663
- expected = np.zeros_like(inputs.numpy())
664
- expected[10, i] = 2.0 * np.sinh(inputs.numpy()[10, i])
665
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
666
- tape.zero()
667
-
668
- # tanh:
669
- tape = wp.Tape()
670
- with tape:
671
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
672
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 11, i], outputs=[out], device=device)
673
-
674
- tape.backward(loss=out)
675
- expected = np.zeros_like(inputs.numpy())
676
- expected[11, i] = 2.0 / (np.cosh(inputs.numpy()[11, i]) ** 2)
677
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
678
- tape.zero()
679
-
680
- # arccos:
681
- tape = wp.Tape()
682
- with tape:
683
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
684
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 12, i], outputs=[out], device=device)
685
-
686
- tape.backward(loss=out)
687
- expected = np.zeros_like(inputs.numpy())
688
- expected[12, i] = -2.0 / np.sqrt(1 - inputs.numpy()[12, i] ** 2)
689
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
690
- tape.zero()
691
-
692
- # arcsin:
693
- tape = wp.Tape()
694
- with tape:
695
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
696
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 13, i], outputs=[out], device=device)
697
-
698
- tape.backward(loss=out)
699
- expected = np.zeros_like(inputs.numpy())
700
- expected[13, i] = 2.0 / np.sqrt(1 - inputs.numpy()[13, i] ** 2)
701
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=6 * tol)
702
- tape.zero()
703
-
704
- # cbrt:
705
- tape = wp.Tape()
706
- with tape:
707
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
708
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 14, i], outputs=[out], device=device)
709
-
710
- tape.backward(loss=out)
711
- expected = np.zeros_like(inputs.numpy())
712
- cbrt = np.cbrt(inputs.numpy()[14, i], dtype=np.dtype(dtype))
713
- expected[14, i] = (2.0 / 3.0) * (1.0 / (cbrt * cbrt))
714
- assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
715
- tape.zero()
716
-
717
-
718
- def test_special_funcs_2arg(test, device, dtype, register_kernels=False):
719
- rng = np.random.default_rng(123)
720
-
721
- tol = {
722
- np.float16: 1.0e-2,
723
- np.float32: 1.0e-6,
724
- np.float64: 1.0e-8,
725
- }.get(dtype, 0)
726
-
727
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
728
-
729
- def check_special_funcs_2arg(
730
- in1: wp.array(dtype=wptype, ndim=2),
731
- in2: wp.array(dtype=wptype, ndim=2),
732
- outputs: wp.array(dtype=wptype, ndim=2),
733
- ):
734
- # multiply outputs by 2 so we've got something to backpropagate:
735
- for i in range(10):
736
- outputs[0, i] = wptype(2) * wp.pow(in1[0, i], in2[0, i])
737
- outputs[1, i] = wptype(2) * wp.atan2(in1[1, i], in2[1, i])
738
-
739
- kernel = getkernel(check_special_funcs_2arg, suffix=dtype.__name__)
740
- output_select_kernel = get_select_kernel2(wptype)
741
-
742
- if register_kernels:
743
- return
744
-
745
- in1 = wp.array(np.abs(randvals(rng, [2, 10], dtype)), dtype=wptype, requires_grad=True, device=device)
746
- in2 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
747
- outputs = wp.zeros_like(in1)
748
-
749
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
750
-
751
- assert_np_equal(outputs.numpy()[0], 2.0 * np.power(in1.numpy()[0], in2.numpy()[0]), tol=tol)
752
- assert_np_equal(outputs.numpy()[1], 2.0 * np.arctan2(in1.numpy()[1], in2.numpy()[1]), tol=tol)
753
-
754
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
755
- if dtype in np_float_types:
756
- for i in range(10):
757
- # pow:
758
- tape = wp.Tape()
759
- with tape:
760
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
761
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
762
- tape.backward(loss=out)
763
- expected = np.zeros_like(in1.numpy())
764
- expected[0, i] = 2.0 * in2.numpy()[0, i] * np.power(in1.numpy()[0, i], in2.numpy()[0, i] - 1)
765
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=5 * tol)
766
- expected[0, i] = 2.0 * np.power(in1.numpy()[0, i], in2.numpy()[0, i]) * np.log(in1.numpy()[0, i])
767
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
768
- tape.zero()
769
-
770
- # atan2:
771
- tape = wp.Tape()
772
- with tape:
773
- wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
774
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
775
-
776
- tape.backward(loss=out)
777
- expected = np.zeros_like(in1.numpy())
778
- expected[1, i] = 2.0 * in2.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
779
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
780
- expected[1, i] = -2.0 * in1.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
781
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
782
- tape.zero()
783
-
784
-
785
- def test_float_to_int(test, device, dtype, register_kernels=False):
786
- rng = np.random.default_rng(123)
787
-
788
- tol = {
789
- np.float16: 5.0e-3,
790
- np.float32: 1.0e-6,
791
- np.float64: 1.0e-8,
792
- }.get(dtype, 0)
793
-
794
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
795
-
796
- def check_float_to_int(
797
- inputs: wp.array(dtype=wptype, ndim=2),
798
- outputs: wp.array(dtype=wptype, ndim=2),
799
- ):
800
- for i in range(10):
801
- outputs[0, i] = wp.round(inputs[0, i])
802
- outputs[1, i] = wp.rint(inputs[1, i])
803
- outputs[2, i] = wp.trunc(inputs[2, i])
804
- outputs[3, i] = wp.floor(inputs[3, i])
805
- outputs[4, i] = wp.ceil(inputs[4, i])
806
- outputs[5, i] = wp.frac(inputs[5, i])
807
-
808
- kernel = getkernel(check_float_to_int, suffix=dtype.__name__)
809
- output_select_kernel = get_select_kernel2(wptype)
810
-
811
- if register_kernels:
812
- return
813
-
814
- inputs = wp.array(rng.standard_normal(size=(6, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device)
815
- outputs = wp.zeros_like(inputs)
816
-
817
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
818
-
819
- assert_np_equal(outputs.numpy()[0], np.round(inputs.numpy()[0]))
820
- assert_np_equal(outputs.numpy()[1], np.rint(inputs.numpy()[1]))
821
- assert_np_equal(outputs.numpy()[2], np.trunc(inputs.numpy()[2]))
822
- assert_np_equal(outputs.numpy()[3], np.floor(inputs.numpy()[3]))
823
- assert_np_equal(outputs.numpy()[4], np.ceil(inputs.numpy()[4]))
824
- assert_np_equal(outputs.numpy()[5], np.modf(inputs.numpy()[5])[0])
825
-
826
- # all the gradients should be zero as these functions are piecewise constant:
827
-
828
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
829
- for i in range(10):
830
- for j in range(5):
831
- tape = wp.Tape()
832
- with tape:
833
- wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
834
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, j, i], outputs=[out], device=device)
835
-
836
- tape.backward(loss=out)
837
- assert_np_equal(tape.gradients[inputs].numpy(), np.zeros_like(inputs.numpy()), tol=tol)
838
- tape.zero()
839
-
840
-
841
- def test_infinity(test, device, dtype, register_kernels=False):
842
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
843
-
844
- def check_infinity(
845
- outputs: wp.array(dtype=wptype),
846
- ):
847
- outputs[0] = wptype(wp.inf)
848
- outputs[1] = wptype(-wp.inf)
849
- outputs[2] = wptype(2.0 * wp.inf)
850
- outputs[3] = wptype(-2.0 * wp.inf)
851
- outputs[4] = wptype(2.0 / 0.0)
852
- outputs[5] = wptype(-2.0 / 0.0)
853
-
854
- kernel = getkernel(check_infinity, suffix=dtype.__name__)
855
-
856
- if register_kernels:
857
- return
858
-
859
- outputs = wp.zeros(6, dtype=wptype, device=device)
860
-
861
- wp.launch(kernel, dim=1, inputs=[], outputs=[outputs], device=device)
862
-
863
- test.assertEqual(outputs.numpy()[0], math.inf)
864
- test.assertEqual(outputs.numpy()[1], -math.inf)
865
- test.assertEqual(outputs.numpy()[2], math.inf)
866
- test.assertEqual(outputs.numpy()[3], -math.inf)
867
- test.assertEqual(outputs.numpy()[4], math.inf)
868
- test.assertEqual(outputs.numpy()[5], -math.inf)
869
-
870
-
871
- def test_interp(test, device, dtype, register_kernels=False):
872
- rng = np.random.default_rng(123)
873
-
874
- tol = {
875
- np.float16: 1.0e-2,
876
- np.float32: 5.0e-6,
877
- np.float64: 1.0e-8,
878
- }.get(dtype, 0)
879
-
880
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
881
-
882
- def check_interp(
883
- in1: wp.array(dtype=wptype, ndim=2),
884
- in2: wp.array(dtype=wptype, ndim=2),
885
- in3: wp.array(dtype=wptype, ndim=2),
886
- outputs: wp.array(dtype=wptype, ndim=2),
887
- ):
888
- # multiply outputs by 2 so we've got something to backpropagate:
889
- for i in range(10):
890
- outputs[0, i] = wptype(2) * wp.smoothstep(in1[0, i], in2[0, i], in3[0, i])
891
- outputs[1, i] = wptype(2) * wp.lerp(in1[1, i], in2[1, i], in3[1, i])
892
-
893
- kernel = getkernel(check_interp, suffix=dtype.__name__)
894
- output_select_kernel = get_select_kernel2(wptype)
895
-
896
- if register_kernels:
897
- return
898
-
899
- e0 = randvals(rng, [2, 10], dtype)
900
- e1 = e0 + randvals(rng, [2, 10], dtype) + 0.1
901
- in1 = wp.array(e0, dtype=wptype, requires_grad=True, device=device)
902
- in2 = wp.array(e1, dtype=wptype, requires_grad=True, device=device)
903
- in3 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
904
-
905
- outputs = wp.zeros_like(in1)
906
-
907
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
908
-
909
- edge0 = in1.numpy()[0]
910
- edge1 = in2.numpy()[0]
911
- t_smoothstep = in3.numpy()[0]
912
- x = np.clip((t_smoothstep - edge0) / (edge1 - edge0), 0, 1)
913
- smoothstep_expected = 2.0 * x * x * (3 - 2 * x)
914
-
915
- assert_np_equal(outputs.numpy()[0], smoothstep_expected, tol=tol)
916
-
917
- a = in1.numpy()[1]
918
- b = in2.numpy()[1]
919
- t = in3.numpy()[1]
920
- assert_np_equal(outputs.numpy()[1], 2.0 * (a * (1 - t) + b * t), tol=tol)
921
-
922
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
923
- if dtype in np_float_types:
924
- for i in range(10):
925
- tape = wp.Tape()
926
- with tape:
927
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
928
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
929
- tape.backward(loss=out)
930
-
931
- # e0 = in1
932
- # e1 = in2
933
- # t = in3
934
-
935
- # x = clamp((t - e0) / (e1 - e0), 0,1)
936
- # dx/dt = 1 / (e1 - e0) if e0 < t < e1 else 0
937
-
938
- # y = x * x * (3 - 2 * x)
939
-
940
- # y = 3 * x * x - 2 * x * x * x
941
- # dy/dx = 6 * ( x - x^2 )
942
- dydx = 6 * x * (1 - x)
943
-
944
- # dy/in1 = dy/dx dx/de0 de0/din1
945
- dxde0 = (t_smoothstep - edge1) / ((edge1 - edge0) ** 2)
946
- dxde0[x == 0] = 0
947
- dxde0[x == 1] = 0
948
-
949
- expected_grads = np.zeros_like(in1.numpy())
950
- expected_grads[0, i] = 2.0 * dydx[i] * dxde0[i]
951
- assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
952
-
953
- # dy/in2 = dy/dx dx/de1 de1/din2
954
- dxde1 = (edge0 - t_smoothstep) / ((edge1 - edge0) ** 2)
955
- dxde1[x == 0] = 0
956
- dxde1[x == 1] = 0
957
-
958
- expected_grads = np.zeros_like(in1.numpy())
959
- expected_grads[0, i] = 2.0 * dydx[i] * dxde1[i]
960
- assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
961
-
962
- # dy/in3 = dy/dx dx/dt dt/din3
963
- dxdt = 1.0 / (edge1 - edge0)
964
- dxdt[x == 0] = 0
965
- dxdt[x == 1] = 0
966
-
967
- expected_grads = np.zeros_like(in1.numpy())
968
- expected_grads[0, i] = 2.0 * dydx[i] * dxdt[i]
969
- assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
970
- tape.zero()
971
-
972
- tape = wp.Tape()
973
- with tape:
974
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
975
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
976
- tape.backward(loss=out)
977
-
978
- # y = a*(1-t) + b*t
979
- # a = in1
980
- # b = in2
981
- # t = in3
982
-
983
- # y = in1*( 1 - in3 ) + in2*in3
984
-
985
- # dy/din1 = (1-in3)
986
- expected_grads = np.zeros_like(in1.numpy())
987
- expected_grads[1, i] = 2.0 * (1 - in3.numpy()[1, i])
988
- assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
989
-
990
- # dy/din2 = in3
991
- expected_grads = np.zeros_like(in1.numpy())
992
- expected_grads[1, i] = 2.0 * in3.numpy()[1, i]
993
- assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
994
-
995
- # dy/din3 = 8*in2 - 1.5*4*in1
996
- expected_grads = np.zeros_like(in1.numpy())
997
- expected_grads[1, i] = 2.0 * (in2.numpy()[1, i] - in1.numpy()[1, i])
998
- assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
999
- tape.zero()
1000
-
1001
-
1002
- def test_clamp(test, device, dtype, register_kernels=False):
1003
- rng = np.random.default_rng(123)
1004
-
1005
- tol = {
1006
- np.float16: 5.0e-3,
1007
- np.float32: 1.0e-6,
1008
- np.float64: 1.0e-6,
1009
- }.get(dtype, 0)
1010
-
1011
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1012
-
1013
- def check_clamp(
1014
- in1: wp.array(dtype=wptype),
1015
- in2: wp.array(dtype=wptype),
1016
- in3: wp.array(dtype=wptype),
1017
- outputs: wp.array(dtype=wptype),
1018
- ):
1019
- for i in range(100):
1020
- # multiply output by 2 so we've got something to backpropagate:
1021
- outputs[i] = wptype(2) * wp.clamp(in1[i], in2[i], in3[i])
1022
-
1023
- kernel = getkernel(check_clamp, suffix=dtype.__name__)
1024
- output_select_kernel = get_select_kernel(wptype)
1025
-
1026
- if register_kernels:
1027
- return
1028
-
1029
- in1 = wp.array(randvals(rng, [100], dtype), dtype=wptype, requires_grad=True, device=device)
1030
- starts = randvals(rng, [100], dtype)
1031
- diffs = np.abs(randvals(rng, [100], dtype))
1032
- in2 = wp.array(starts, dtype=wptype, requires_grad=True, device=device)
1033
- in3 = wp.array(starts + diffs, dtype=wptype, requires_grad=True, device=device)
1034
- outputs = wp.zeros_like(in1)
1035
-
1036
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1037
-
1038
- assert_np_equal(2 * np.clip(in1.numpy(), in2.numpy(), in3.numpy()), outputs.numpy(), tol=tol)
1039
-
1040
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1041
- if dtype in np_float_types:
1042
- for i in range(100):
1043
- tape = wp.Tape()
1044
- with tape:
1045
- wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1046
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
1047
-
1048
- tape.backward(loss=out)
1049
- t = in1.numpy()[i]
1050
- lower = in2.numpy()[i]
1051
- upper = in3.numpy()[i]
1052
- expected = np.zeros_like(in1.numpy())
1053
- if t < lower:
1054
- expected[i] = 2.0
1055
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1056
- expected[i] = 0.0
1057
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1058
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1059
- elif t > upper:
1060
- expected[i] = 2.0
1061
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1062
- expected[i] = 0.0
1063
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1064
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1065
- else:
1066
- expected[i] = 2.0
1067
- assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1068
- expected[i] = 0.0
1069
- assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1070
- assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1071
-
1072
- tape.zero()
1073
-
1074
-
1075
- devices = get_test_devices()
1076
-
1077
-
1078
- class TestArithmetic(unittest.TestCase):
1079
- pass
1080
-
1081
-
1082
- # these unary ops only make sense for signed values:
1083
- for dtype in np_signed_int_types + np_float_types:
1084
- add_function_test_register_kernel(
1085
- TestArithmetic, f"test_unary_ops_{dtype.__name__}", test_unary_ops, devices=devices, dtype=dtype
1086
- )
1087
-
1088
- for dtype in np_float_types:
1089
- add_function_test_register_kernel(
1090
- TestArithmetic, f"test_special_funcs_{dtype.__name__}", test_special_funcs, devices=devices, dtype=dtype
1091
- )
1092
- add_function_test_register_kernel(
1093
- TestArithmetic,
1094
- f"test_special_funcs_2arg_{dtype.__name__}",
1095
- test_special_funcs_2arg,
1096
- devices=devices,
1097
- dtype=dtype,
1098
- )
1099
- add_function_test_register_kernel(
1100
- TestArithmetic, f"test_interp_{dtype.__name__}", test_interp, devices=devices, dtype=dtype
1101
- )
1102
- add_function_test_register_kernel(
1103
- TestArithmetic, f"test_float_to_int_{dtype.__name__}", test_float_to_int, devices=devices, dtype=dtype
1104
- )
1105
- add_function_test_register_kernel(
1106
- TestArithmetic, f"test_infinity_{dtype.__name__}", test_infinity, devices=devices, dtype=dtype
1107
- )
1108
-
1109
- for dtype in np_scalar_types:
1110
- add_function_test_register_kernel(
1111
- TestArithmetic, f"test_clamp_{dtype.__name__}", test_clamp, devices=devices, dtype=dtype
1112
- )
1113
- add_function_test_register_kernel(
1114
- TestArithmetic, f"test_nonzero_{dtype.__name__}", test_nonzero, devices=devices, dtype=dtype
1115
- )
1116
- add_function_test(TestArithmetic, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
1117
- add_function_test_register_kernel(
1118
- TestArithmetic, f"test_binary_ops_{dtype.__name__}", test_binary_ops, devices=devices, dtype=dtype
1119
- )
1120
-
1121
-
1122
- if __name__ == "__main__":
1123
- wp.build.clear_kernel_cache()
1124
- unittest.main(verbosity=2, failfast=False)
1
+ # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ from warp.tests.unittest_utils import *
14
+
15
+ np_signed_int_types = [
16
+ np.int8,
17
+ np.int16,
18
+ np.int32,
19
+ np.int64,
20
+ np.byte,
21
+ ]
22
+
23
+ np_unsigned_int_types = [
24
+ np.uint8,
25
+ np.uint16,
26
+ np.uint32,
27
+ np.uint64,
28
+ np.ubyte,
29
+ ]
30
+
31
+ np_int_types = np_signed_int_types + np_unsigned_int_types
32
+
33
+ np_float_types = [np.float16, np.float32, np.float64]
34
+
35
+ np_scalar_types = np_int_types + np_float_types
36
+
37
+
38
+ def randvals(rng, shape, dtype):
39
+ if dtype in np_float_types:
40
+ return rng.standard_normal(size=shape).astype(dtype)
41
+ elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
42
+ return rng.integers(1, high=3, size=shape, dtype=dtype)
43
+ return rng.integers(1, high=5, size=shape, dtype=dtype)
44
+
45
+
46
+ kernel_cache = {}
47
+
48
+
49
+ def getkernel(func, suffix=""):
50
+ key = func.__name__ + "_" + suffix
51
+ if key not in kernel_cache:
52
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
53
+ return kernel_cache[key]
54
+
55
+
56
+ def get_select_kernel(dtype):
57
+ def output_select_kernel_fn(
58
+ input: wp.array(dtype=dtype),
59
+ index: int,
60
+ out: wp.array(dtype=dtype),
61
+ ):
62
+ out[0] = input[index]
63
+
64
+ return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
65
+
66
+
67
+ def get_select_kernel2(dtype):
68
+ def output_select_kernel2_fn(
69
+ input: wp.array(dtype=dtype, ndim=2),
70
+ index0: int,
71
+ index1: int,
72
+ out: wp.array(dtype=dtype),
73
+ ):
74
+ out[0] = input[index0, index1]
75
+
76
+ return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
77
+
78
+
79
+ def test_arrays(test, device, dtype):
80
+ rng = np.random.default_rng(123)
81
+
82
+ tol = {
83
+ np.float16: 1.0e-3,
84
+ np.float32: 1.0e-6,
85
+ np.float64: 1.0e-8,
86
+ }.get(dtype, 0)
87
+
88
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
89
+ arr_np = randvals(rng, (10, 5), dtype)
90
+ arr = wp.array(arr_np, dtype=wptype, requires_grad=True, device=device)
91
+
92
+ assert_np_equal(arr.numpy(), arr_np, tol=tol)
93
+
94
+
95
+ def test_unary_ops(test, device, dtype, register_kernels=False):
96
+ rng = np.random.default_rng(123)
97
+
98
+ tol = {
99
+ np.float16: 5.0e-3,
100
+ np.float32: 1.0e-6,
101
+ np.float64: 1.0e-8,
102
+ }.get(dtype, 0)
103
+
104
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
105
+
106
+ def check_unary(
107
+ inputs: wp.array(dtype=wptype, ndim=2),
108
+ outputs: wp.array(dtype=wptype, ndim=2),
109
+ ):
110
+ for i in range(10):
111
+ i0 = inputs[0, i]
112
+ i1 = inputs[1, i]
113
+ i2 = inputs[2, i]
114
+ i3 = inputs[3, i]
115
+ i4 = inputs[4, i]
116
+
117
+ # multiply outputs by 2 so we've got something to backpropagate:
118
+ outputs[0, i] = wptype(2.0) * (+i0)
119
+ outputs[1, i] = wptype(2.0) * (-i1)
120
+ outputs[2, i] = wptype(2.0) * wp.sign(i2)
121
+ outputs[3, i] = wptype(2.0) * wp.abs(i3)
122
+ outputs[4, i] = wptype(2.0) * wp.step(i4)
123
+
124
+ kernel = getkernel(check_unary, suffix=dtype.__name__)
125
+ output_select_kernel = get_select_kernel2(wptype)
126
+
127
+ if register_kernels:
128
+ return
129
+
130
+ if dtype in np_float_types:
131
+ inputs = wp.array(
132
+ rng.standard_normal(size=(5, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device
133
+ )
134
+ else:
135
+ inputs = wp.array(
136
+ rng.integers(-2, high=3, size=(5, 10), dtype=dtype), dtype=wptype, requires_grad=True, device=device
137
+ )
138
+ outputs = wp.zeros_like(inputs)
139
+
140
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
141
+ assert_np_equal(outputs.numpy()[0], 2 * inputs.numpy()[0], tol=tol)
142
+ assert_np_equal(outputs.numpy()[1], -2 * inputs.numpy()[1], tol=tol)
143
+ expected = 2 * np.sign(inputs.numpy()[2])
144
+ expected[expected == 0] = 2
145
+ assert_np_equal(outputs.numpy()[2], expected, tol=tol)
146
+ assert_np_equal(outputs.numpy()[3], 2 * np.abs(inputs.numpy()[3]), tol=tol)
147
+ assert_np_equal(outputs.numpy()[4], 2 * (1 - np.heaviside(inputs.numpy()[4], 1)), tol=tol)
148
+
149
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
150
+ if dtype in np_float_types:
151
+ for i in range(10):
152
+ # grad of 2x:
153
+ tape = wp.Tape()
154
+ with tape:
155
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
156
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
157
+
158
+ tape.backward(loss=out)
159
+ expected_grads = np.zeros_like(inputs.numpy())
160
+ expected_grads[0, i] = 2
161
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
162
+ tape.zero()
163
+
164
+ # grad of -2x:
165
+ tape = wp.Tape()
166
+ with tape:
167
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
168
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
169
+
170
+ tape.backward(loss=out)
171
+ expected_grads = np.zeros_like(inputs.numpy())
172
+ expected_grads[1, i] = -2
173
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
174
+ tape.zero()
175
+
176
+ # grad of 2 * sign(x):
177
+ tape = wp.Tape()
178
+ with tape:
179
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
180
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
181
+
182
+ tape.backward(loss=out)
183
+ expected_grads = np.zeros_like(inputs.numpy())
184
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
185
+ tape.zero()
186
+
187
+ # grad of 2 * abs(x):
188
+ tape = wp.Tape()
189
+ with tape:
190
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
191
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
192
+
193
+ tape.backward(loss=out)
194
+ expected_grads = np.zeros_like(inputs.numpy())
195
+ expected_grads[3, i] = 2 * np.sign(inputs.numpy()[3, i])
196
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
197
+ tape.zero()
198
+
199
+ # grad of 2 * step(x):
200
+ tape = wp.Tape()
201
+ with tape:
202
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
203
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
204
+
205
+ tape.backward(loss=out)
206
+ expected_grads = np.zeros_like(inputs.numpy())
207
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
208
+ tape.zero()
209
+
210
+
211
+ def test_nonzero(test, device, dtype, register_kernels=False):
212
+ rng = np.random.default_rng(123)
213
+
214
+ tol = {
215
+ np.float16: 5.0e-3,
216
+ np.float32: 1.0e-6,
217
+ np.float64: 1.0e-8,
218
+ }.get(dtype, 0)
219
+
220
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
221
+
222
+ def check_nonzero(
223
+ inputs: wp.array(dtype=wptype),
224
+ outputs: wp.array(dtype=wptype),
225
+ ):
226
+ for i in range(10):
227
+ i0 = inputs[i]
228
+ outputs[i] = wp.nonzero(i0)
229
+
230
+ kernel = getkernel(check_nonzero, suffix=dtype.__name__)
231
+ output_select_kernel = get_select_kernel(wptype)
232
+
233
+ if register_kernels:
234
+ return
235
+
236
+ inputs = wp.array(rng.integers(-2, high=3, size=10).astype(dtype), dtype=wptype, requires_grad=True, device=device)
237
+ outputs = wp.zeros_like(inputs)
238
+
239
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
240
+ assert_np_equal(outputs.numpy(), (inputs.numpy() != 0))
241
+
242
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
243
+ if dtype in np_float_types:
244
+ for i in range(10):
245
+ # grad should just be zero:
246
+ tape = wp.Tape()
247
+ with tape:
248
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
249
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
250
+
251
+ tape.backward(loss=out)
252
+ expected_grads = np.zeros_like(inputs.numpy())
253
+ assert_np_equal(tape.gradients[inputs].numpy(), expected_grads, tol=tol)
254
+ tape.zero()
255
+
256
+
257
+ def test_binary_ops(test, device, dtype, register_kernels=False):
258
+ rng = np.random.default_rng(123)
259
+
260
+ tol = {
261
+ np.float16: 5.0e-2,
262
+ np.float32: 1.0e-6,
263
+ np.float64: 1.0e-8,
264
+ }.get(dtype, 0)
265
+
266
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
267
+
268
+ def check_binary_ops(
269
+ in1: wp.array(dtype=wptype, ndim=2),
270
+ in2: wp.array(dtype=wptype, ndim=2),
271
+ outputs: wp.array(dtype=wptype, ndim=2),
272
+ ):
273
+ for i in range(10):
274
+ i0 = in1[0, i]
275
+ i1 = in1[1, i]
276
+ i2 = in1[2, i]
277
+ i3 = in1[3, i]
278
+ i4 = in1[4, i]
279
+ i5 = in1[5, i]
280
+ i6 = in1[6, i]
281
+ i7 = in1[7, i]
282
+
283
+ j0 = in2[0, i]
284
+ j1 = in2[1, i]
285
+ j2 = in2[2, i]
286
+ j3 = in2[3, i]
287
+ j4 = in2[4, i]
288
+ j5 = in2[5, i]
289
+ j6 = in2[6, i]
290
+ j7 = in2[7, i]
291
+
292
+ outputs[0, i] = wptype(2) * wp.mul(i0, j0)
293
+ outputs[1, i] = wptype(2) * wp.div(i1, j1)
294
+ outputs[2, i] = wptype(2) * wp.add(i2, j2)
295
+ outputs[3, i] = wptype(2) * wp.sub(i3, j3)
296
+ outputs[4, i] = wptype(2) * wp.mod(i4, j4)
297
+ outputs[5, i] = wptype(2) * wp.min(i5, j5)
298
+ outputs[6, i] = wptype(2) * wp.max(i6, j6)
299
+ outputs[7, i] = wptype(2) * wp.floordiv(i7, j7)
300
+
301
+ kernel = getkernel(check_binary_ops, suffix=dtype.__name__)
302
+ output_select_kernel = get_select_kernel2(wptype)
303
+
304
+ if register_kernels:
305
+ return
306
+
307
+ vals1 = randvals(rng, [8, 10], dtype)
308
+ if dtype in [np_unsigned_int_types]:
309
+ vals2 = vals1 + randvals(rng, [8, 10], dtype)
310
+ else:
311
+ vals2 = np.abs(randvals(rng, [8, 10], dtype))
312
+
313
+ in1 = wp.array(vals1, dtype=wptype, requires_grad=True, device=device)
314
+ in2 = wp.array(vals2, dtype=wptype, requires_grad=True, device=device)
315
+
316
+ outputs = wp.zeros_like(in1)
317
+
318
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
319
+
320
+ assert_np_equal(outputs.numpy()[0], 2 * in1.numpy()[0] * in2.numpy()[0], tol=tol)
321
+ if dtype in np_float_types:
322
+ assert_np_equal(outputs.numpy()[1], 2 * in1.numpy()[1] / (in2.numpy()[1]), tol=tol)
323
+ else:
324
+ assert_np_equal(outputs.numpy()[1], 2 * (in1.numpy()[1] // (in2.numpy()[1])), tol=tol)
325
+ assert_np_equal(outputs.numpy()[2], 2 * (in1.numpy()[2] + (in2.numpy()[2])), tol=tol)
326
+ assert_np_equal(outputs.numpy()[3], 2 * (in1.numpy()[3] - (in2.numpy()[3])), tol=tol)
327
+
328
+ # ...so this is actually the desired behaviour right? Looks like wp.mod doesn't behave like
329
+ # python's % operator or np.mod()...
330
+ assert_np_equal(
331
+ outputs.numpy()[4],
332
+ 2
333
+ * (
334
+ (in1.numpy()[4])
335
+ - (in2.numpy()[4]) * np.sign(in1.numpy()[4]) * np.floor(np.abs(in1.numpy()[4]) / (in2.numpy()[4]))
336
+ ),
337
+ tol=tol,
338
+ )
339
+
340
+ assert_np_equal(outputs.numpy()[5], 2 * np.minimum(in1.numpy()[5], in2.numpy()[5]), tol=tol)
341
+ assert_np_equal(outputs.numpy()[6], 2 * np.maximum(in1.numpy()[6], in2.numpy()[6]), tol=tol)
342
+ assert_np_equal(outputs.numpy()[7], 2 * np.floor_divide(in1.numpy()[7], in2.numpy()[7]), tol=tol)
343
+
344
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
345
+ if dtype in np_float_types:
346
+ for i in range(10):
347
+ # multiplication:
348
+ tape = wp.Tape()
349
+ with tape:
350
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
351
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
352
+
353
+ tape.backward(loss=out)
354
+ expected = np.zeros_like(in1.numpy())
355
+ expected[0, i] = 2.0 * in2.numpy()[0, i]
356
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
357
+ expected[0, i] = 2.0 * in1.numpy()[0, i]
358
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
359
+ tape.zero()
360
+
361
+ # division:
362
+ tape = wp.Tape()
363
+ with tape:
364
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
365
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
366
+
367
+ tape.backward(loss=out)
368
+ expected = np.zeros_like(in1.numpy())
369
+ expected[1, i] = 2.0 / (in2.numpy()[1, i])
370
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
371
+ # y = x1/x2
372
+ # dy/dx2 = -x1/x2^2
373
+ expected[1, i] = (-2.0) * (in1.numpy()[1, i] / (in2.numpy()[1, i] ** 2))
374
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
375
+ tape.zero()
376
+
377
+ # addition:
378
+ tape = wp.Tape()
379
+ with tape:
380
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
381
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
382
+
383
+ tape.backward(loss=out)
384
+ expected = np.zeros_like(in1.numpy())
385
+ expected[2, i] = 2.0
386
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
387
+ expected[2, i] = 2.0
388
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
389
+ tape.zero()
390
+
391
+ # subtraction:
392
+ tape = wp.Tape()
393
+ with tape:
394
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
395
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
396
+
397
+ tape.backward(loss=out)
398
+ expected = np.zeros_like(in1.numpy())
399
+ expected[3, i] = 2.0
400
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
401
+ expected[3, i] = -2.0
402
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
403
+ tape.zero()
404
+
405
+ # modulus. unless at discontinuities,
406
+ # d/dx1( x1 % x2 ) == 1
407
+ # d/dx2( x1 % x2 ) == 0
408
+ tape = wp.Tape()
409
+ with tape:
410
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
411
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
412
+
413
+ tape.backward(loss=out)
414
+ expected = np.zeros_like(in1.numpy())
415
+ expected[4, i] = 2.0
416
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
417
+ expected[4, i] = 0.0
418
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
419
+ tape.zero()
420
+
421
+ # min
422
+ tape = wp.Tape()
423
+ with tape:
424
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
425
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
426
+
427
+ tape.backward(loss=out)
428
+ expected = np.zeros_like(in1.numpy())
429
+ expected[5, i] = 2.0 if (in1.numpy()[5, i] < in2.numpy()[5, i]) else 0.0
430
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
431
+ expected[5, i] = 2.0 if (in2.numpy()[5, i] < in1.numpy()[5, i]) else 0.0
432
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
433
+ tape.zero()
434
+
435
+ # max
436
+ tape = wp.Tape()
437
+ with tape:
438
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
439
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
440
+
441
+ tape.backward(loss=out)
442
+ expected = np.zeros_like(in1.numpy())
443
+ expected[6, i] = 2.0 if (in1.numpy()[6, i] > in2.numpy()[6, i]) else 0.0
444
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
445
+ expected[6, i] = 2.0 if (in2.numpy()[6, i] > in1.numpy()[6, i]) else 0.0
446
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
447
+ tape.zero()
448
+
449
+ # floor_divide. Returns integers so gradient is zero
450
+ tape = wp.Tape()
451
+ with tape:
452
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
453
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
454
+
455
+ tape.backward(loss=out)
456
+ expected = np.zeros_like(in1.numpy())
457
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
458
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
459
+ tape.zero()
460
+
461
+
462
+ def test_special_funcs(test, device, dtype, register_kernels=False):
463
+ rng = np.random.default_rng(123)
464
+
465
+ tol = {
466
+ np.float16: 1.0e-2,
467
+ np.float32: 1.0e-6,
468
+ np.float64: 1.0e-8,
469
+ }.get(dtype, 0)
470
+
471
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
472
+
473
+ def check_special_funcs(
474
+ inputs: wp.array(dtype=wptype, ndim=2),
475
+ outputs: wp.array(dtype=wptype, ndim=2),
476
+ ):
477
+ # multiply outputs by 2 so we've got something to backpropagate:
478
+ for i in range(10):
479
+ outputs[0, i] = wptype(2) * wp.log(inputs[0, i])
480
+ outputs[1, i] = wptype(2) * wp.log2(inputs[1, i])
481
+ outputs[2, i] = wptype(2) * wp.log10(inputs[2, i])
482
+ outputs[3, i] = wptype(2) * wp.exp(inputs[3, i])
483
+ outputs[4, i] = wptype(2) * wp.atan(inputs[4, i])
484
+ outputs[5, i] = wptype(2) * wp.sin(inputs[5, i])
485
+ outputs[6, i] = wptype(2) * wp.cos(inputs[6, i])
486
+ outputs[7, i] = wptype(2) * wp.sqrt(inputs[7, i])
487
+ outputs[8, i] = wptype(2) * wp.tan(inputs[8, i])
488
+ outputs[9, i] = wptype(2) * wp.sinh(inputs[9, i])
489
+ outputs[10, i] = wptype(2) * wp.cosh(inputs[10, i])
490
+ outputs[11, i] = wptype(2) * wp.tanh(inputs[11, i])
491
+ outputs[12, i] = wptype(2) * wp.acos(inputs[12, i])
492
+ outputs[13, i] = wptype(2) * wp.asin(inputs[13, i])
493
+ outputs[14, i] = wptype(2) * wp.cbrt(inputs[14, i])
494
+
495
+ kernel = getkernel(check_special_funcs, suffix=dtype.__name__)
496
+ output_select_kernel = get_select_kernel2(wptype)
497
+
498
+ if register_kernels:
499
+ return
500
+
501
+ invals = rng.normal(size=(15, 10)).astype(dtype)
502
+ invals[[0, 1, 2, 7, 14]] = 0.1 + np.abs(invals[[0, 1, 2, 7, 14]])
503
+ invals[12] = np.clip(invals[12], -0.9, 0.9)
504
+ invals[13] = np.clip(invals[13], -0.9, 0.9)
505
+ inputs = wp.array(invals, dtype=wptype, requires_grad=True, device=device)
506
+ outputs = wp.zeros_like(inputs)
507
+
508
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
509
+
510
+ assert_np_equal(outputs.numpy()[0], 2 * np.log(inputs.numpy()[0]), tol=tol)
511
+ assert_np_equal(outputs.numpy()[1], 2 * np.log2(inputs.numpy()[1]), tol=tol)
512
+ assert_np_equal(outputs.numpy()[2], 2 * np.log10(inputs.numpy()[2]), tol=tol)
513
+ assert_np_equal(outputs.numpy()[3], 2 * np.exp(inputs.numpy()[3]), tol=tol)
514
+ assert_np_equal(outputs.numpy()[4], 2 * np.arctan(inputs.numpy()[4]), tol=tol)
515
+ assert_np_equal(outputs.numpy()[5], 2 * np.sin(inputs.numpy()[5]), tol=tol)
516
+ assert_np_equal(outputs.numpy()[6], 2 * np.cos(inputs.numpy()[6]), tol=tol)
517
+ assert_np_equal(outputs.numpy()[7], 2 * np.sqrt(inputs.numpy()[7]), tol=tol)
518
+ assert_np_equal(outputs.numpy()[8], 2 * np.tan(inputs.numpy()[8]), tol=tol)
519
+ assert_np_equal(outputs.numpy()[9], 2 * np.sinh(inputs.numpy()[9]), tol=tol)
520
+ assert_np_equal(outputs.numpy()[10], 2 * np.cosh(inputs.numpy()[10]), tol=tol)
521
+ assert_np_equal(outputs.numpy()[11], 2 * np.tanh(inputs.numpy()[11]), tol=tol)
522
+ assert_np_equal(outputs.numpy()[12], 2 * np.arccos(inputs.numpy()[12]), tol=tol)
523
+ assert_np_equal(outputs.numpy()[13], 2 * np.arcsin(inputs.numpy()[13]), tol=tol)
524
+ assert_np_equal(outputs.numpy()[14], 2 * np.cbrt(inputs.numpy()[14]), tol=tol)
525
+
526
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
527
+ if dtype in np_float_types:
528
+ for i in range(10):
529
+ # log:
530
+ tape = wp.Tape()
531
+ with tape:
532
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
533
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
534
+
535
+ tape.backward(loss=out)
536
+ expected = np.zeros_like(inputs.numpy())
537
+ expected[0, i] = 2.0 / inputs.numpy()[0, i]
538
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
539
+ tape.zero()
540
+
541
+ # log2:
542
+ tape = wp.Tape()
543
+ with tape:
544
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
545
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
546
+
547
+ tape.backward(loss=out)
548
+ expected = np.zeros_like(inputs.numpy())
549
+ expected[1, i] = 2.0 / (inputs.numpy()[1, i] * np.log(2.0))
550
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
551
+ tape.zero()
552
+
553
+ # log10:
554
+ tape = wp.Tape()
555
+ with tape:
556
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
557
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 2, i], outputs=[out], device=device)
558
+
559
+ tape.backward(loss=out)
560
+ expected = np.zeros_like(inputs.numpy())
561
+ expected[2, i] = 2.0 / (inputs.numpy()[2, i] * np.log(10.0))
562
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
563
+ tape.zero()
564
+
565
+ # exp:
566
+ tape = wp.Tape()
567
+ with tape:
568
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
569
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 3, i], outputs=[out], device=device)
570
+
571
+ tape.backward(loss=out)
572
+ expected = np.zeros_like(inputs.numpy())
573
+ expected[3, i] = outputs.numpy()[3, i]
574
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
575
+ tape.zero()
576
+
577
+ # arctan:
578
+ # looks like the autodiff formula in warp was wrong? Was (1 + x^2) rather than
579
+ # 1/(1 + x^2)
580
+ tape = wp.Tape()
581
+ with tape:
582
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
583
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 4, i], outputs=[out], device=device)
584
+
585
+ tape.backward(loss=out)
586
+ expected = np.zeros_like(inputs.numpy())
587
+ expected[4, i] = 2.0 / (inputs.numpy()[4, i] ** 2 + 1)
588
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
589
+ tape.zero()
590
+
591
+ # sin:
592
+ tape = wp.Tape()
593
+ with tape:
594
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
595
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 5, i], outputs=[out], device=device)
596
+
597
+ tape.backward(loss=out)
598
+ expected = np.zeros_like(inputs.numpy())
599
+ expected[5, i] = np.cos(inputs.numpy()[5, i]) * 2
600
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
601
+ tape.zero()
602
+
603
+ # cos:
604
+ tape = wp.Tape()
605
+ with tape:
606
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
607
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 6, i], outputs=[out], device=device)
608
+
609
+ tape.backward(loss=out)
610
+ expected = np.zeros_like(inputs.numpy())
611
+ expected[6, i] = -np.sin(inputs.numpy()[6, i]) * 2.0
612
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
613
+ tape.zero()
614
+
615
+ # sqrt:
616
+ tape = wp.Tape()
617
+ with tape:
618
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
619
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 7, i], outputs=[out], device=device)
620
+
621
+ tape.backward(loss=out)
622
+ expected = np.zeros_like(inputs.numpy())
623
+ expected[7, i] = 1.0 / (np.sqrt(inputs.numpy()[7, i]))
624
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
625
+ tape.zero()
626
+
627
+ # tan:
628
+ # looks like there was a bug in autodiff formula here too - gradient was zero if cos(x) > 0
629
+ # (should have been "if(cosx != 0)")
630
+ tape = wp.Tape()
631
+ with tape:
632
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
633
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 8, i], outputs=[out], device=device)
634
+
635
+ tape.backward(loss=out)
636
+ expected = np.zeros_like(inputs.numpy())
637
+ expected[8, i] = 2.0 / (np.cos(inputs.numpy()[8, i]) ** 2)
638
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=200 * tol)
639
+ tape.zero()
640
+
641
+ # sinh:
642
+ tape = wp.Tape()
643
+ with tape:
644
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
645
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 9, i], outputs=[out], device=device)
646
+
647
+ tape.backward(loss=out)
648
+ expected = np.zeros_like(inputs.numpy())
649
+ expected[9, i] = 2.0 * np.cosh(inputs.numpy()[9, i])
650
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
651
+ tape.zero()
652
+
653
+ # cosh:
654
+ tape = wp.Tape()
655
+ with tape:
656
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
657
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 10, i], outputs=[out], device=device)
658
+
659
+ tape.backward(loss=out)
660
+ expected = np.zeros_like(inputs.numpy())
661
+ expected[10, i] = 2.0 * np.sinh(inputs.numpy()[10, i])
662
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
663
+ tape.zero()
664
+
665
+ # tanh:
666
+ tape = wp.Tape()
667
+ with tape:
668
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
669
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 11, i], outputs=[out], device=device)
670
+
671
+ tape.backward(loss=out)
672
+ expected = np.zeros_like(inputs.numpy())
673
+ expected[11, i] = 2.0 / (np.cosh(inputs.numpy()[11, i]) ** 2)
674
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
675
+ tape.zero()
676
+
677
+ # arccos:
678
+ tape = wp.Tape()
679
+ with tape:
680
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
681
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 12, i], outputs=[out], device=device)
682
+
683
+ tape.backward(loss=out)
684
+ expected = np.zeros_like(inputs.numpy())
685
+ expected[12, i] = -2.0 / np.sqrt(1 - inputs.numpy()[12, i] ** 2)
686
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
687
+ tape.zero()
688
+
689
+ # arcsin:
690
+ tape = wp.Tape()
691
+ with tape:
692
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
693
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 13, i], outputs=[out], device=device)
694
+
695
+ tape.backward(loss=out)
696
+ expected = np.zeros_like(inputs.numpy())
697
+ expected[13, i] = 2.0 / np.sqrt(1 - inputs.numpy()[13, i] ** 2)
698
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=6 * tol)
699
+ tape.zero()
700
+
701
+ # cbrt:
702
+ tape = wp.Tape()
703
+ with tape:
704
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
705
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 14, i], outputs=[out], device=device)
706
+
707
+ tape.backward(loss=out)
708
+ expected = np.zeros_like(inputs.numpy())
709
+ cbrt = np.cbrt(inputs.numpy()[14, i], dtype=np.dtype(dtype))
710
+ expected[14, i] = (2.0 / 3.0) * (1.0 / (cbrt * cbrt))
711
+ assert_np_equal(tape.gradients[inputs].numpy(), expected, tol=tol)
712
+ tape.zero()
713
+
714
+
715
+ def test_special_funcs_2arg(test, device, dtype, register_kernels=False):
716
+ rng = np.random.default_rng(123)
717
+
718
+ tol = {
719
+ np.float16: 1.0e-2,
720
+ np.float32: 1.0e-6,
721
+ np.float64: 1.0e-8,
722
+ }.get(dtype, 0)
723
+
724
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
725
+
726
+ def check_special_funcs_2arg(
727
+ in1: wp.array(dtype=wptype, ndim=2),
728
+ in2: wp.array(dtype=wptype, ndim=2),
729
+ outputs: wp.array(dtype=wptype, ndim=2),
730
+ ):
731
+ # multiply outputs by 2 so we've got something to backpropagate:
732
+ for i in range(10):
733
+ outputs[0, i] = wptype(2) * wp.pow(in1[0, i], in2[0, i])
734
+ outputs[1, i] = wptype(2) * wp.atan2(in1[1, i], in2[1, i])
735
+
736
+ kernel = getkernel(check_special_funcs_2arg, suffix=dtype.__name__)
737
+ output_select_kernel = get_select_kernel2(wptype)
738
+
739
+ if register_kernels:
740
+ return
741
+
742
+ in1 = wp.array(np.abs(randvals(rng, [2, 10], dtype)), dtype=wptype, requires_grad=True, device=device)
743
+ in2 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
744
+ outputs = wp.zeros_like(in1)
745
+
746
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
747
+
748
+ assert_np_equal(outputs.numpy()[0], 2.0 * np.power(in1.numpy()[0], in2.numpy()[0]), tol=tol)
749
+ assert_np_equal(outputs.numpy()[1], 2.0 * np.arctan2(in1.numpy()[1], in2.numpy()[1]), tol=tol)
750
+
751
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
752
+ if dtype in np_float_types:
753
+ for i in range(10):
754
+ # pow:
755
+ tape = wp.Tape()
756
+ with tape:
757
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
758
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
759
+ tape.backward(loss=out)
760
+ expected = np.zeros_like(in1.numpy())
761
+ expected[0, i] = 2.0 * in2.numpy()[0, i] * np.power(in1.numpy()[0, i], in2.numpy()[0, i] - 1)
762
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=5 * tol)
763
+ expected[0, i] = 2.0 * np.power(in1.numpy()[0, i], in2.numpy()[0, i]) * np.log(in1.numpy()[0, i])
764
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
765
+ tape.zero()
766
+
767
+ # atan2:
768
+ tape = wp.Tape()
769
+ with tape:
770
+ wp.launch(kernel, dim=1, inputs=[in1, in2], outputs=[outputs], device=device)
771
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
772
+
773
+ tape.backward(loss=out)
774
+ expected = np.zeros_like(in1.numpy())
775
+ expected[1, i] = 2.0 * in2.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
776
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
777
+ expected[1, i] = -2.0 * in1.numpy()[1, i] / (in1.numpy()[1, i] ** 2 + in2.numpy()[1, i] ** 2)
778
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
779
+ tape.zero()
780
+
781
+
782
+ def test_float_to_int(test, device, dtype, register_kernels=False):
783
+ rng = np.random.default_rng(123)
784
+
785
+ tol = {
786
+ np.float16: 5.0e-3,
787
+ np.float32: 1.0e-6,
788
+ np.float64: 1.0e-8,
789
+ }.get(dtype, 0)
790
+
791
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
792
+
793
+ def check_float_to_int(
794
+ inputs: wp.array(dtype=wptype, ndim=2),
795
+ outputs: wp.array(dtype=wptype, ndim=2),
796
+ ):
797
+ for i in range(10):
798
+ outputs[0, i] = wp.round(inputs[0, i])
799
+ outputs[1, i] = wp.rint(inputs[1, i])
800
+ outputs[2, i] = wp.trunc(inputs[2, i])
801
+ outputs[3, i] = wp.floor(inputs[3, i])
802
+ outputs[4, i] = wp.ceil(inputs[4, i])
803
+ outputs[5, i] = wp.frac(inputs[5, i])
804
+
805
+ kernel = getkernel(check_float_to_int, suffix=dtype.__name__)
806
+ output_select_kernel = get_select_kernel2(wptype)
807
+
808
+ if register_kernels:
809
+ return
810
+
811
+ inputs = wp.array(rng.standard_normal(size=(6, 10)).astype(dtype), dtype=wptype, requires_grad=True, device=device)
812
+ outputs = wp.zeros_like(inputs)
813
+
814
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
815
+
816
+ assert_np_equal(outputs.numpy()[0], np.round(inputs.numpy()[0]))
817
+ assert_np_equal(outputs.numpy()[1], np.rint(inputs.numpy()[1]))
818
+ assert_np_equal(outputs.numpy()[2], np.trunc(inputs.numpy()[2]))
819
+ assert_np_equal(outputs.numpy()[3], np.floor(inputs.numpy()[3]))
820
+ assert_np_equal(outputs.numpy()[4], np.ceil(inputs.numpy()[4]))
821
+ assert_np_equal(outputs.numpy()[5], np.modf(inputs.numpy()[5])[0])
822
+
823
+ # all the gradients should be zero as these functions are piecewise constant:
824
+
825
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
826
+ for i in range(10):
827
+ for j in range(5):
828
+ tape = wp.Tape()
829
+ with tape:
830
+ wp.launch(kernel, dim=1, inputs=[inputs], outputs=[outputs], device=device)
831
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, j, i], outputs=[out], device=device)
832
+
833
+ tape.backward(loss=out)
834
+ assert_np_equal(tape.gradients[inputs].numpy(), np.zeros_like(inputs.numpy()), tol=tol)
835
+ tape.zero()
836
+
837
+
838
+ def test_interp(test, device, dtype, register_kernels=False):
839
+ rng = np.random.default_rng(123)
840
+
841
+ tol = {
842
+ np.float16: 1.0e-2,
843
+ np.float32: 5.0e-6,
844
+ np.float64: 1.0e-8,
845
+ }.get(dtype, 0)
846
+
847
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
848
+
849
+ def check_interp(
850
+ in1: wp.array(dtype=wptype, ndim=2),
851
+ in2: wp.array(dtype=wptype, ndim=2),
852
+ in3: wp.array(dtype=wptype, ndim=2),
853
+ outputs: wp.array(dtype=wptype, ndim=2),
854
+ ):
855
+ # multiply outputs by 2 so we've got something to backpropagate:
856
+ for i in range(10):
857
+ outputs[0, i] = wptype(2) * wp.smoothstep(in1[0, i], in2[0, i], in3[0, i])
858
+ outputs[1, i] = wptype(2) * wp.lerp(in1[1, i], in2[1, i], in3[1, i])
859
+
860
+ kernel = getkernel(check_interp, suffix=dtype.__name__)
861
+ output_select_kernel = get_select_kernel2(wptype)
862
+
863
+ if register_kernels:
864
+ return
865
+
866
+ e0 = randvals(rng, [2, 10], dtype)
867
+ e1 = e0 + randvals(rng, [2, 10], dtype) + 0.1
868
+ in1 = wp.array(e0, dtype=wptype, requires_grad=True, device=device)
869
+ in2 = wp.array(e1, dtype=wptype, requires_grad=True, device=device)
870
+ in3 = wp.array(randvals(rng, [2, 10], dtype), dtype=wptype, requires_grad=True, device=device)
871
+
872
+ outputs = wp.zeros_like(in1)
873
+
874
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
875
+
876
+ edge0 = in1.numpy()[0]
877
+ edge1 = in2.numpy()[0]
878
+ t_smoothstep = in3.numpy()[0]
879
+ x = np.clip((t_smoothstep - edge0) / (edge1 - edge0), 0, 1)
880
+ smoothstep_expected = 2.0 * x * x * (3 - 2 * x)
881
+
882
+ assert_np_equal(outputs.numpy()[0], smoothstep_expected, tol=tol)
883
+
884
+ a = in1.numpy()[1]
885
+ b = in2.numpy()[1]
886
+ t = in3.numpy()[1]
887
+ assert_np_equal(outputs.numpy()[1], 2.0 * (a * (1 - t) + b * t), tol=tol)
888
+
889
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
890
+ if dtype in np_float_types:
891
+ for i in range(10):
892
+ tape = wp.Tape()
893
+ with tape:
894
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
895
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 0, i], outputs=[out], device=device)
896
+ tape.backward(loss=out)
897
+
898
+ # e0 = in1
899
+ # e1 = in2
900
+ # t = in3
901
+
902
+ # x = clamp((t - e0) / (e1 - e0), 0,1)
903
+ # dx/dt = 1 / (e1 - e0) if e0 < t < e1 else 0
904
+
905
+ # y = x * x * (3 - 2 * x)
906
+
907
+ # y = 3 * x * x - 2 * x * x * x
908
+ # dy/dx = 6 * ( x - x^2 )
909
+ dydx = 6 * x * (1 - x)
910
+
911
+ # dy/in1 = dy/dx dx/de0 de0/din1
912
+ dxde0 = (t_smoothstep - edge1) / ((edge1 - edge0) ** 2)
913
+ dxde0[x == 0] = 0
914
+ dxde0[x == 1] = 0
915
+
916
+ expected_grads = np.zeros_like(in1.numpy())
917
+ expected_grads[0, i] = 2.0 * dydx[i] * dxde0[i]
918
+ assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
919
+
920
+ # dy/in2 = dy/dx dx/de1 de1/din2
921
+ dxde1 = (edge0 - t_smoothstep) / ((edge1 - edge0) ** 2)
922
+ dxde1[x == 0] = 0
923
+ dxde1[x == 1] = 0
924
+
925
+ expected_grads = np.zeros_like(in1.numpy())
926
+ expected_grads[0, i] = 2.0 * dydx[i] * dxde1[i]
927
+ assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
928
+
929
+ # dy/in3 = dy/dx dx/dt dt/din3
930
+ dxdt = 1.0 / (edge1 - edge0)
931
+ dxdt[x == 0] = 0
932
+ dxdt[x == 1] = 0
933
+
934
+ expected_grads = np.zeros_like(in1.numpy())
935
+ expected_grads[0, i] = 2.0 * dydx[i] * dxdt[i]
936
+ assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
937
+ tape.zero()
938
+
939
+ tape = wp.Tape()
940
+ with tape:
941
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
942
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, 1, i], outputs=[out], device=device)
943
+ tape.backward(loss=out)
944
+
945
+ # y = a*(1-t) + b*t
946
+ # a = in1
947
+ # b = in2
948
+ # t = in3
949
+
950
+ # y = in1*( 1 - in3 ) + in2*in3
951
+
952
+ # dy/din1 = (1-in3)
953
+ expected_grads = np.zeros_like(in1.numpy())
954
+ expected_grads[1, i] = 2.0 * (1 - in3.numpy()[1, i])
955
+ assert_np_equal(tape.gradients[in1].numpy(), expected_grads, tol=tol)
956
+
957
+ # dy/din2 = in3
958
+ expected_grads = np.zeros_like(in1.numpy())
959
+ expected_grads[1, i] = 2.0 * in3.numpy()[1, i]
960
+ assert_np_equal(tape.gradients[in2].numpy(), expected_grads, tol=tol)
961
+
962
+ # dy/din3 = 8*in2 - 1.5*4*in1
963
+ expected_grads = np.zeros_like(in1.numpy())
964
+ expected_grads[1, i] = 2.0 * (in2.numpy()[1, i] - in1.numpy()[1, i])
965
+ assert_np_equal(tape.gradients[in3].numpy(), expected_grads, tol=tol)
966
+ tape.zero()
967
+
968
+
969
+ def test_clamp(test, device, dtype, register_kernels=False):
970
+ rng = np.random.default_rng(123)
971
+
972
+ tol = {
973
+ np.float16: 5.0e-3,
974
+ np.float32: 1.0e-6,
975
+ np.float64: 1.0e-6,
976
+ }.get(dtype, 0)
977
+
978
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
979
+
980
+ def check_clamp(
981
+ in1: wp.array(dtype=wptype),
982
+ in2: wp.array(dtype=wptype),
983
+ in3: wp.array(dtype=wptype),
984
+ outputs: wp.array(dtype=wptype),
985
+ ):
986
+ for i in range(100):
987
+ # multiply output by 2 so we've got something to backpropagate:
988
+ outputs[i] = wptype(2) * wp.clamp(in1[i], in2[i], in3[i])
989
+
990
+ kernel = getkernel(check_clamp, suffix=dtype.__name__)
991
+ output_select_kernel = get_select_kernel(wptype)
992
+
993
+ if register_kernels:
994
+ return
995
+
996
+ in1 = wp.array(randvals(rng, [100], dtype), dtype=wptype, requires_grad=True, device=device)
997
+ starts = randvals(rng, [100], dtype)
998
+ diffs = np.abs(randvals(rng, [100], dtype))
999
+ in2 = wp.array(starts, dtype=wptype, requires_grad=True, device=device)
1000
+ in3 = wp.array(starts + diffs, dtype=wptype, requires_grad=True, device=device)
1001
+ outputs = wp.zeros_like(in1)
1002
+
1003
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1004
+
1005
+ assert_np_equal(2 * np.clip(in1.numpy(), in2.numpy(), in3.numpy()), outputs.numpy(), tol=tol)
1006
+
1007
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1008
+ if dtype in np_float_types:
1009
+ for i in range(100):
1010
+ tape = wp.Tape()
1011
+ with tape:
1012
+ wp.launch(kernel, dim=1, inputs=[in1, in2, in3], outputs=[outputs], device=device)
1013
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[out], device=device)
1014
+
1015
+ tape.backward(loss=out)
1016
+ t = in1.numpy()[i]
1017
+ lower = in2.numpy()[i]
1018
+ upper = in3.numpy()[i]
1019
+ expected = np.zeros_like(in1.numpy())
1020
+ if t < lower:
1021
+ expected[i] = 2.0
1022
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1023
+ expected[i] = 0.0
1024
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1025
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1026
+ elif t > upper:
1027
+ expected[i] = 2.0
1028
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1029
+ expected[i] = 0.0
1030
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1031
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1032
+ else:
1033
+ expected[i] = 2.0
1034
+ assert_np_equal(tape.gradients[in1].numpy(), expected, tol=tol)
1035
+ expected[i] = 0.0
1036
+ assert_np_equal(tape.gradients[in2].numpy(), expected, tol=tol)
1037
+ assert_np_equal(tape.gradients[in3].numpy(), expected, tol=tol)
1038
+
1039
+ tape.zero()
1040
+
1041
+
1042
+ devices = get_test_devices()
1043
+
1044
+
1045
+ class TestArithmetic(unittest.TestCase):
1046
+ pass
1047
+
1048
+
1049
+ # these unary ops only make sense for signed values:
1050
+ for dtype in np_signed_int_types + np_float_types:
1051
+ add_function_test_register_kernel(
1052
+ TestArithmetic, f"test_unary_ops_{dtype.__name__}", test_unary_ops, devices=devices, dtype=dtype
1053
+ )
1054
+
1055
+ for dtype in np_float_types:
1056
+ add_function_test_register_kernel(
1057
+ TestArithmetic, f"test_special_funcs_{dtype.__name__}", test_special_funcs, devices=devices, dtype=dtype
1058
+ )
1059
+ add_function_test_register_kernel(
1060
+ TestArithmetic,
1061
+ f"test_special_funcs_2arg_{dtype.__name__}",
1062
+ test_special_funcs_2arg,
1063
+ devices=devices,
1064
+ dtype=dtype,
1065
+ )
1066
+ add_function_test_register_kernel(
1067
+ TestArithmetic, f"test_interp_{dtype.__name__}", test_interp, devices=devices, dtype=dtype
1068
+ )
1069
+ add_function_test_register_kernel(
1070
+ TestArithmetic, f"test_float_to_int_{dtype.__name__}", test_float_to_int, devices=devices, dtype=dtype
1071
+ )
1072
+
1073
+ for dtype in np_scalar_types:
1074
+ add_function_test_register_kernel(
1075
+ TestArithmetic, f"test_clamp_{dtype.__name__}", test_clamp, devices=devices, dtype=dtype
1076
+ )
1077
+ add_function_test_register_kernel(
1078
+ TestArithmetic, f"test_nonzero_{dtype.__name__}", test_nonzero, devices=devices, dtype=dtype
1079
+ )
1080
+ add_function_test(TestArithmetic, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
1081
+ add_function_test_register_kernel(
1082
+ TestArithmetic, f"test_binary_ops_{dtype.__name__}", test_binary_ops, devices=devices, dtype=dtype
1083
+ )
1084
+
1085
+
1086
+ if __name__ == "__main__":
1087
+ wp.build.clear_kernel_cache()
1088
+ unittest.main(verbosity=2, failfast=False)