warp-lang 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. docs/conf.py +3 -4
  2. examples/env/env_ant.py +1 -1
  3. examples/env/env_cartpole.py +1 -1
  4. examples/env/env_humanoid.py +1 -1
  5. examples/example_dem.py +28 -26
  6. examples/example_diffray.py +37 -30
  7. examples/example_fluid.py +7 -3
  8. examples/example_jacobian_ik.py +1 -1
  9. examples/example_mesh_intersect.py +10 -7
  10. examples/example_nvdb.py +3 -3
  11. examples/example_render_opengl.py +19 -10
  12. examples/example_sim_cartpole.py +9 -5
  13. examples/example_sim_cloth.py +29 -25
  14. examples/example_sim_fk_grad.py +2 -2
  15. examples/example_sim_fk_grad_torch.py +3 -3
  16. examples/example_sim_grad_bounce.py +11 -8
  17. examples/example_sim_grad_cloth.py +12 -9
  18. examples/example_sim_granular.py +2 -2
  19. examples/example_sim_granular_collision_sdf.py +13 -13
  20. examples/example_sim_neo_hookean.py +3 -3
  21. examples/example_sim_particle_chain.py +2 -2
  22. examples/example_sim_quadruped.py +8 -5
  23. examples/example_sim_rigid_chain.py +8 -5
  24. examples/example_sim_rigid_contact.py +13 -10
  25. examples/example_sim_rigid_fem.py +2 -2
  26. examples/example_sim_rigid_gyroscopic.py +2 -2
  27. examples/example_sim_rigid_kinematics.py +1 -1
  28. examples/example_sim_trajopt.py +3 -2
  29. examples/fem/example_apic_fluid.py +5 -7
  30. examples/fem/example_diffusion_mgpu.py +18 -16
  31. warp/__init__.py +3 -2
  32. warp/bin/warp.so +0 -0
  33. warp/build_dll.py +29 -9
  34. warp/builtins.py +206 -7
  35. warp/codegen.py +58 -38
  36. warp/config.py +3 -1
  37. warp/context.py +234 -128
  38. warp/fem/__init__.py +2 -2
  39. warp/fem/cache.py +2 -1
  40. warp/fem/field/nodal_field.py +18 -17
  41. warp/fem/geometry/hexmesh.py +11 -6
  42. warp/fem/geometry/quadmesh_2d.py +16 -12
  43. warp/fem/geometry/tetmesh.py +19 -8
  44. warp/fem/geometry/trimesh_2d.py +18 -7
  45. warp/fem/integrate.py +341 -196
  46. warp/fem/quadrature/__init__.py +1 -1
  47. warp/fem/quadrature/pic_quadrature.py +138 -53
  48. warp/fem/quadrature/quadrature.py +81 -9
  49. warp/fem/space/__init__.py +1 -1
  50. warp/fem/space/basis_space.py +169 -51
  51. warp/fem/space/grid_2d_function_space.py +2 -2
  52. warp/fem/space/grid_3d_function_space.py +2 -2
  53. warp/fem/space/hexmesh_function_space.py +2 -2
  54. warp/fem/space/partition.py +9 -6
  55. warp/fem/space/quadmesh_2d_function_space.py +2 -2
  56. warp/fem/space/shape/cube_shape_function.py +27 -15
  57. warp/fem/space/shape/square_shape_function.py +29 -18
  58. warp/fem/space/tetmesh_function_space.py +2 -2
  59. warp/fem/space/topology.py +10 -0
  60. warp/fem/space/trimesh_2d_function_space.py +2 -2
  61. warp/fem/utils.py +10 -5
  62. warp/native/array.h +49 -8
  63. warp/native/builtin.h +31 -14
  64. warp/native/cuda_util.cpp +8 -3
  65. warp/native/cuda_util.h +1 -0
  66. warp/native/exports.h +1177 -1108
  67. warp/native/intersect.h +4 -4
  68. warp/native/intersect_adj.h +8 -8
  69. warp/native/mat.h +65 -6
  70. warp/native/mesh.h +126 -5
  71. warp/native/quat.h +28 -4
  72. warp/native/vec.h +76 -14
  73. warp/native/warp.cu +1 -6
  74. warp/render/render_opengl.py +261 -109
  75. warp/sim/import_mjcf.py +13 -7
  76. warp/sim/import_urdf.py +14 -14
  77. warp/sim/inertia.py +17 -18
  78. warp/sim/model.py +67 -67
  79. warp/sim/render.py +1 -1
  80. warp/sparse.py +6 -6
  81. warp/stubs.py +19 -81
  82. warp/tape.py +1 -1
  83. warp/tests/__main__.py +3 -6
  84. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  85. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  86. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  87. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  88. warp/tests/aux_test_unresolved_func.py +14 -0
  89. warp/tests/aux_test_unresolved_symbol.py +14 -0
  90. warp/tests/{test_kinematics.py → disabled_kinematics.py} +10 -12
  91. warp/tests/run_coverage_serial.py +31 -0
  92. warp/tests/test_adam.py +102 -106
  93. warp/tests/test_arithmetic.py +39 -40
  94. warp/tests/test_array.py +46 -48
  95. warp/tests/test_array_reduce.py +25 -19
  96. warp/tests/test_atomic.py +62 -26
  97. warp/tests/test_bool.py +16 -11
  98. warp/tests/test_builtins_resolution.py +1292 -0
  99. warp/tests/test_bvh.py +9 -12
  100. warp/tests/test_closest_point_edge_edge.py +53 -57
  101. warp/tests/test_codegen.py +164 -134
  102. warp/tests/test_compile_consts.py +13 -19
  103. warp/tests/test_conditional.py +30 -32
  104. warp/tests/test_copy.py +9 -12
  105. warp/tests/test_ctypes.py +90 -98
  106. warp/tests/test_dense.py +20 -14
  107. warp/tests/test_devices.py +34 -35
  108. warp/tests/test_dlpack.py +74 -75
  109. warp/tests/test_examples.py +215 -97
  110. warp/tests/test_fabricarray.py +15 -21
  111. warp/tests/test_fast_math.py +14 -11
  112. warp/tests/test_fem.py +280 -97
  113. warp/tests/test_fp16.py +19 -15
  114. warp/tests/test_func.py +177 -194
  115. warp/tests/test_generics.py +71 -77
  116. warp/tests/test_grad.py +83 -32
  117. warp/tests/test_grad_customs.py +7 -9
  118. warp/tests/test_hash_grid.py +6 -10
  119. warp/tests/test_import.py +9 -23
  120. warp/tests/test_indexedarray.py +19 -21
  121. warp/tests/test_intersect.py +15 -9
  122. warp/tests/test_large.py +17 -19
  123. warp/tests/test_launch.py +14 -17
  124. warp/tests/test_lerp.py +63 -63
  125. warp/tests/test_lvalue.py +84 -35
  126. warp/tests/test_marching_cubes.py +9 -13
  127. warp/tests/test_mat.py +388 -3004
  128. warp/tests/test_mat_lite.py +9 -12
  129. warp/tests/test_mat_scalar_ops.py +2889 -0
  130. warp/tests/test_math.py +10 -11
  131. warp/tests/test_matmul.py +104 -100
  132. warp/tests/test_matmul_lite.py +72 -98
  133. warp/tests/test_mesh.py +35 -32
  134. warp/tests/test_mesh_query_aabb.py +18 -25
  135. warp/tests/test_mesh_query_point.py +39 -23
  136. warp/tests/test_mesh_query_ray.py +9 -21
  137. warp/tests/test_mlp.py +8 -9
  138. warp/tests/test_model.py +89 -93
  139. warp/tests/test_modules_lite.py +15 -25
  140. warp/tests/test_multigpu.py +87 -114
  141. warp/tests/test_noise.py +10 -12
  142. warp/tests/test_operators.py +14 -21
  143. warp/tests/test_options.py +10 -11
  144. warp/tests/test_pinned.py +16 -18
  145. warp/tests/test_print.py +16 -20
  146. warp/tests/test_quat.py +121 -88
  147. warp/tests/test_rand.py +12 -13
  148. warp/tests/test_reload.py +27 -32
  149. warp/tests/test_rounding.py +7 -10
  150. warp/tests/test_runlength_encode.py +105 -106
  151. warp/tests/test_smoothstep.py +8 -9
  152. warp/tests/test_snippet.py +13 -22
  153. warp/tests/test_sparse.py +30 -29
  154. warp/tests/test_spatial.py +179 -174
  155. warp/tests/test_streams.py +100 -107
  156. warp/tests/test_struct.py +98 -67
  157. warp/tests/test_tape.py +11 -17
  158. warp/tests/test_torch.py +89 -86
  159. warp/tests/test_transient_module.py +9 -12
  160. warp/tests/test_types.py +328 -50
  161. warp/tests/test_utils.py +217 -218
  162. warp/tests/test_vec.py +133 -2133
  163. warp/tests/test_vec_lite.py +8 -11
  164. warp/tests/test_vec_scalar_ops.py +2099 -0
  165. warp/tests/test_volume.py +391 -382
  166. warp/tests/test_volume_write.py +122 -135
  167. warp/tests/unittest_serial.py +35 -0
  168. warp/tests/unittest_suites.py +291 -0
  169. warp/tests/{test_base.py → unittest_utils.py} +138 -25
  170. warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
  171. warp/tests/{test_debug.py → walkthough_debug.py} +2 -15
  172. warp/thirdparty/unittest_parallel.py +257 -54
  173. warp/types.py +119 -98
  174. warp/utils.py +14 -0
  175. {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/METADATA +2 -1
  176. {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/RECORD +182 -178
  177. {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
  178. warp/tests/test_all.py +0 -239
  179. warp/tests/test_conditional_unequal_types_kernels.py +0 -14
  180. warp/tests/test_coverage.py +0 -38
  181. warp/tests/test_unresolved_func.py +0 -7
  182. warp/tests/test_unresolved_symbol.py +0 -7
  183. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  184. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  185. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  186. {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
  187. {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/tests/test_vec.py CHANGED
@@ -8,8 +8,9 @@
8
8
  import unittest
9
9
 
10
10
  import numpy as np
11
+
11
12
  import warp as wp
12
- from warp.tests.test_base import *
13
+ from warp.tests.unittest_utils import *
13
14
 
14
15
  wp.init()
15
16
 
@@ -29,12 +30,8 @@ np_unsigned_int_types = [
29
30
  np.ubyte,
30
31
  ]
31
32
 
32
- np_int_types = np_signed_int_types + np_unsigned_int_types
33
-
34
33
  np_float_types = [np.float16, np.float32, np.float64]
35
34
 
36
- np_scalar_types = np_int_types + np_float_types
37
-
38
35
 
39
36
  def randvals(rng, shape, dtype):
40
37
  if dtype in np_float_types:
@@ -54,521 +51,6 @@ def getkernel(func, suffix=""):
54
51
  return kernel_cache[key]
55
52
 
56
53
 
57
- def get_select_kernel(dtype):
58
- def output_select_kernel_fn(
59
- input: wp.array(dtype=dtype),
60
- index: int,
61
- out: wp.array(dtype=dtype),
62
- ):
63
- out[0] = input[index]
64
-
65
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
66
-
67
-
68
- def get_select_kernel2(dtype):
69
- def output_select_kernel2_fn(
70
- input: wp.array(dtype=dtype, ndim=2),
71
- index0: int,
72
- index1: int,
73
- out: wp.array(dtype=dtype),
74
- ):
75
- out[0] = input[index0, index1]
76
-
77
- return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
78
-
79
-
80
- def test_arrays(test, device, dtype):
81
- rng = np.random.default_rng(123)
82
-
83
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
84
- vec2 = wp.types.vector(length=2, dtype=wptype)
85
- vec3 = wp.types.vector(length=3, dtype=wptype)
86
- vec4 = wp.types.vector(length=4, dtype=wptype)
87
- vec5 = wp.types.vector(length=5, dtype=wptype)
88
-
89
- v2_np = randvals(rng, (10, 2), dtype)
90
- v3_np = randvals(rng, (10, 3), dtype)
91
- v4_np = randvals(rng, (10, 4), dtype)
92
- v5_np = randvals(rng, (10, 5), dtype)
93
-
94
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
95
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
96
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
97
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
98
-
99
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
100
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
101
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
102
- assert_np_equal(v5.numpy(), v5_np, tol=1.0e-6)
103
-
104
- vec2 = wp.types.vector(length=2, dtype=wptype)
105
- vec3 = wp.types.vector(length=3, dtype=wptype)
106
- vec4 = wp.types.vector(length=4, dtype=wptype)
107
-
108
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
109
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
110
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
111
-
112
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
113
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
114
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
115
-
116
-
117
- def test_components(test, device, dtype):
118
- # test accessing vector components from Python - this is especially important
119
- # for float16, which requires special handling internally
120
-
121
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
122
- vec3 = wp.types.vector(length=3, dtype=wptype)
123
-
124
- v = vec3(1, 2, 3)
125
-
126
- # test __getitem__ for individual components
127
- test.assertEqual(v[0], 1)
128
- test.assertEqual(v[1], 2)
129
- test.assertEqual(v[2], 3)
130
-
131
- # test __getitem__ for slices
132
- s = v[:]
133
- test.assertEqual(s[0], 1)
134
- test.assertEqual(s[1], 2)
135
- test.assertEqual(s[2], 3)
136
-
137
- s = v[1:]
138
- test.assertEqual(s[0], 2)
139
- test.assertEqual(s[1], 3)
140
-
141
- s = v[:2]
142
- test.assertEqual(s[0], 1)
143
- test.assertEqual(s[1], 2)
144
-
145
- s = v[::2]
146
- test.assertEqual(s[0], 1)
147
- test.assertEqual(s[1], 3)
148
-
149
- # test __setitem__ for individual components
150
- v[0] = 4
151
- v[1] = 5
152
- v[2] = 6
153
- test.assertEqual(v[0], 4)
154
- test.assertEqual(v[1], 5)
155
- test.assertEqual(v[2], 6)
156
-
157
- # test __setitem__ for slices
158
- v[:] = [7, 8, 9]
159
- test.assertEqual(v[0], 7)
160
- test.assertEqual(v[1], 8)
161
- test.assertEqual(v[2], 9)
162
-
163
- v[1:] = [10, 11]
164
- test.assertEqual(v[0], 7)
165
- test.assertEqual(v[1], 10)
166
- test.assertEqual(v[2], 11)
167
-
168
- v[:2] = [12, 13]
169
- test.assertEqual(v[0], 12)
170
- test.assertEqual(v[1], 13)
171
- test.assertEqual(v[2], 11)
172
-
173
- v[::2] = [14, 15]
174
- test.assertEqual(v[0], 14)
175
- test.assertEqual(v[1], 13)
176
- test.assertEqual(v[2], 15)
177
-
178
-
179
- def test_anon_type_instance(test, device, dtype, register_kernels=False):
180
- rng = np.random.default_rng(123)
181
-
182
- tol = {
183
- np.float16: 5.0e-3,
184
- np.float32: 1.0e-6,
185
- np.float64: 1.0e-8,
186
- }.get(dtype, 0)
187
-
188
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
189
-
190
- def check_scalar_init(
191
- input: wp.array(dtype=wptype),
192
- output: wp.array(dtype=wptype),
193
- ):
194
- v2result = wp.vector(input[0], length=2)
195
- v3result = wp.vector(input[1], length=3)
196
- v4result = wp.vector(input[2], length=4)
197
- v5result = wp.vector(input[3], length=5)
198
-
199
- idx = 0
200
- for i in range(2):
201
- output[idx] = wptype(2) * v2result[i]
202
- idx = idx + 1
203
- for i in range(3):
204
- output[idx] = wptype(2) * v3result[i]
205
- idx = idx + 1
206
- for i in range(4):
207
- output[idx] = wptype(2) * v4result[i]
208
- idx = idx + 1
209
- for i in range(5):
210
- output[idx] = wptype(2) * v5result[i]
211
- idx = idx + 1
212
-
213
- def check_component_init(
214
- input: wp.array(dtype=wptype),
215
- output: wp.array(dtype=wptype),
216
- ):
217
- v2result = wp.vector(input[0], input[1])
218
- v3result = wp.vector(input[2], input[3], input[4])
219
- v4result = wp.vector(input[5], input[6], input[7], input[8])
220
- v5result = wp.vector(input[9], input[10], input[11], input[12], input[13])
221
-
222
- idx = 0
223
- for i in range(2):
224
- output[idx] = wptype(2) * v2result[i]
225
- idx = idx + 1
226
- for i in range(3):
227
- output[idx] = wptype(2) * v3result[i]
228
- idx = idx + 1
229
- for i in range(4):
230
- output[idx] = wptype(2) * v4result[i]
231
- idx = idx + 1
232
- for i in range(5):
233
- output[idx] = wptype(2) * v5result[i]
234
- idx = idx + 1
235
-
236
- scalar_kernel = getkernel(check_scalar_init, suffix=dtype.__name__)
237
- component_kernel = getkernel(check_component_init, suffix=dtype.__name__)
238
- output_select_kernel = get_select_kernel(wptype)
239
-
240
- if register_kernels:
241
- return
242
-
243
- input = wp.array(randvals(rng, [4], dtype), requires_grad=True, device=device)
244
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
245
-
246
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
247
-
248
- assert_np_equal(output.numpy()[:2], 2 * np.array([input.numpy()[0]] * 2), tol=1.0e-6)
249
- assert_np_equal(output.numpy()[2:5], 2 * np.array([input.numpy()[1]] * 3), tol=1.0e-6)
250
- assert_np_equal(output.numpy()[5:9], 2 * np.array([input.numpy()[2]] * 4), tol=1.0e-6)
251
- assert_np_equal(output.numpy()[9:], 2 * np.array([input.numpy()[3]] * 5), tol=1.0e-6)
252
-
253
- if dtype in np_float_types:
254
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
255
- for i in range(len(output)):
256
- tape = wp.Tape()
257
- with tape:
258
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
259
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
260
-
261
- tape.backward(loss=out)
262
- expected = np.zeros_like(input.numpy())
263
- if i < 2:
264
- expected[0] = 2
265
- elif i < 5:
266
- expected[1] = 2
267
- elif i < 9:
268
- expected[2] = 2
269
- else:
270
- expected[3] = 2
271
-
272
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
273
-
274
- tape.reset()
275
- tape.zero()
276
-
277
- input = wp.array(randvals(rng, [2 + 3 + 4 + 5], dtype), requires_grad=True, device=device)
278
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
279
-
280
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
281
-
282
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=1.0e-6)
283
-
284
- if dtype in np_float_types:
285
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
286
- for i in range(len(output)):
287
- tape = wp.Tape()
288
- with tape:
289
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
290
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
291
-
292
- tape.backward(loss=out)
293
- expected = np.zeros_like(input.numpy())
294
- expected[i] = 2
295
-
296
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
297
-
298
- tape.reset()
299
- tape.zero()
300
-
301
-
302
- def test_constants(test, device, dtype, register_kernels=False):
303
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
304
- vec2 = wp.types.vector(length=2, dtype=wptype)
305
- vec3 = wp.types.vector(length=3, dtype=wptype)
306
- vec4 = wp.types.vector(length=4, dtype=wptype)
307
- vec5 = wp.types.vector(length=5, dtype=wptype)
308
-
309
- cv2 = wp.constant(vec2(1, 2))
310
- cv3 = wp.constant(vec3(1, 2, 3))
311
- cv4 = wp.constant(vec4(1, 2, 3, 4))
312
- cv5 = wp.constant(vec5(1, 2, 3, 4, 5))
313
-
314
- def check_vector_constants():
315
- wp.expect_eq(cv2, vec2(wptype(1), wptype(2)))
316
- wp.expect_eq(cv3, vec3(wptype(1), wptype(2), wptype(3)))
317
- wp.expect_eq(cv4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
318
- wp.expect_eq(cv5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
319
-
320
- kernel = getkernel(check_vector_constants, suffix=dtype.__name__)
321
-
322
- if register_kernels:
323
- return
324
-
325
- wp.launch(kernel, dim=1, inputs=[])
326
-
327
-
328
- def test_constructors(test, device, dtype, register_kernels=False):
329
- rng = np.random.default_rng(123)
330
-
331
- tol = {
332
- np.float16: 5.0e-3,
333
- np.float32: 1.0e-6,
334
- np.float64: 1.0e-8,
335
- }.get(dtype, 0)
336
-
337
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
338
- vec2 = wp.types.vector(length=2, dtype=wptype)
339
- vec3 = wp.types.vector(length=3, dtype=wptype)
340
- vec4 = wp.types.vector(length=4, dtype=wptype)
341
- vec5 = wp.types.vector(length=5, dtype=wptype)
342
-
343
- def check_scalar_constructor(
344
- input: wp.array(dtype=wptype),
345
- v2: wp.array(dtype=vec2),
346
- v3: wp.array(dtype=vec3),
347
- v4: wp.array(dtype=vec4),
348
- v5: wp.array(dtype=vec5),
349
- v20: wp.array(dtype=wptype),
350
- v21: wp.array(dtype=wptype),
351
- v30: wp.array(dtype=wptype),
352
- v31: wp.array(dtype=wptype),
353
- v32: wp.array(dtype=wptype),
354
- v40: wp.array(dtype=wptype),
355
- v41: wp.array(dtype=wptype),
356
- v42: wp.array(dtype=wptype),
357
- v43: wp.array(dtype=wptype),
358
- v50: wp.array(dtype=wptype),
359
- v51: wp.array(dtype=wptype),
360
- v52: wp.array(dtype=wptype),
361
- v53: wp.array(dtype=wptype),
362
- v54: wp.array(dtype=wptype),
363
- ):
364
- v2result = vec2(input[0])
365
- v3result = vec3(input[0])
366
- v4result = vec4(input[0])
367
- v5result = vec5(input[0])
368
-
369
- v2[0] = v2result
370
- v3[0] = v3result
371
- v4[0] = v4result
372
- v5[0] = v5result
373
-
374
- # multiply outputs by 2 so we've got something to backpropagate
375
- v20[0] = wptype(2) * v2result[0]
376
- v21[0] = wptype(2) * v2result[1]
377
-
378
- v30[0] = wptype(2) * v3result[0]
379
- v31[0] = wptype(2) * v3result[1]
380
- v32[0] = wptype(2) * v3result[2]
381
-
382
- v40[0] = wptype(2) * v4result[0]
383
- v41[0] = wptype(2) * v4result[1]
384
- v42[0] = wptype(2) * v4result[2]
385
- v43[0] = wptype(2) * v4result[3]
386
-
387
- v50[0] = wptype(2) * v5result[0]
388
- v51[0] = wptype(2) * v5result[1]
389
- v52[0] = wptype(2) * v5result[2]
390
- v53[0] = wptype(2) * v5result[3]
391
- v54[0] = wptype(2) * v5result[4]
392
-
393
- def check_vector_constructors(
394
- input: wp.array(dtype=wptype),
395
- v2: wp.array(dtype=vec2),
396
- v3: wp.array(dtype=vec3),
397
- v4: wp.array(dtype=vec4),
398
- v5: wp.array(dtype=vec5),
399
- v20: wp.array(dtype=wptype),
400
- v21: wp.array(dtype=wptype),
401
- v30: wp.array(dtype=wptype),
402
- v31: wp.array(dtype=wptype),
403
- v32: wp.array(dtype=wptype),
404
- v40: wp.array(dtype=wptype),
405
- v41: wp.array(dtype=wptype),
406
- v42: wp.array(dtype=wptype),
407
- v43: wp.array(dtype=wptype),
408
- v50: wp.array(dtype=wptype),
409
- v51: wp.array(dtype=wptype),
410
- v52: wp.array(dtype=wptype),
411
- v53: wp.array(dtype=wptype),
412
- v54: wp.array(dtype=wptype),
413
- ):
414
- v2result = vec2(input[0], input[1])
415
- v3result = vec3(input[2], input[3], input[4])
416
- v4result = vec4(input[5], input[6], input[7], input[8])
417
- v5result = vec5(input[9], input[10], input[11], input[12], input[13])
418
-
419
- v2[0] = v2result
420
- v3[0] = v3result
421
- v4[0] = v4result
422
- v5[0] = v5result
423
-
424
- # multiply the output by 2 so we've got something to backpropagate:
425
- v20[0] = wptype(2) * v2result[0]
426
- v21[0] = wptype(2) * v2result[1]
427
-
428
- v30[0] = wptype(2) * v3result[0]
429
- v31[0] = wptype(2) * v3result[1]
430
- v32[0] = wptype(2) * v3result[2]
431
-
432
- v40[0] = wptype(2) * v4result[0]
433
- v41[0] = wptype(2) * v4result[1]
434
- v42[0] = wptype(2) * v4result[2]
435
- v43[0] = wptype(2) * v4result[3]
436
-
437
- v50[0] = wptype(2) * v5result[0]
438
- v51[0] = wptype(2) * v5result[1]
439
- v52[0] = wptype(2) * v5result[2]
440
- v53[0] = wptype(2) * v5result[3]
441
- v54[0] = wptype(2) * v5result[4]
442
-
443
- vec_kernel = getkernel(check_vector_constructors, suffix=dtype.__name__)
444
- kernel = getkernel(check_scalar_constructor, suffix=dtype.__name__)
445
-
446
- if register_kernels:
447
- return
448
-
449
- input = wp.array(randvals(rng, [1], dtype), requires_grad=True, device=device)
450
- v2 = wp.zeros(1, dtype=vec2, device=device)
451
- v3 = wp.zeros(1, dtype=vec3, device=device)
452
- v4 = wp.zeros(1, dtype=vec4, device=device)
453
- v5 = wp.zeros(1, dtype=vec5, device=device)
454
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
455
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
456
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
457
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
458
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
459
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
460
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
461
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
462
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
463
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
464
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
465
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
466
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
467
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
-
469
- tape = wp.Tape()
470
- with tape:
471
- wp.launch(
472
- kernel,
473
- dim=1,
474
- inputs=[input],
475
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
476
- device=device,
477
- )
478
-
479
- if dtype in np_float_types:
480
- for l in [v20, v21]:
481
- tape.backward(loss=l)
482
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
483
- tape.zero()
484
-
485
- for l in [v30, v31, v32]:
486
- tape.backward(loss=l)
487
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
488
- tape.zero()
489
-
490
- for l in [v40, v41, v42, v43]:
491
- tape.backward(loss=l)
492
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
493
- tape.zero()
494
-
495
- for l in [v50, v51, v52, v53, v54]:
496
- tape.backward(loss=l)
497
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
498
- tape.zero()
499
-
500
- val = input.numpy()[0]
501
- assert_np_equal(v2.numpy()[0], np.array([val, val]), tol=1.0e-6)
502
- assert_np_equal(v3.numpy()[0], np.array([val, val, val]), tol=1.0e-6)
503
- assert_np_equal(v4.numpy()[0], np.array([val, val, val, val]), tol=1.0e-6)
504
- assert_np_equal(v5.numpy()[0], np.array([val, val, val, val, val]), tol=1.0e-6)
505
-
506
- assert_np_equal(v20.numpy()[0], 2 * val, tol=1.0e-6)
507
- assert_np_equal(v21.numpy()[0], 2 * val, tol=1.0e-6)
508
- assert_np_equal(v30.numpy()[0], 2 * val, tol=1.0e-6)
509
- assert_np_equal(v31.numpy()[0], 2 * val, tol=1.0e-6)
510
- assert_np_equal(v32.numpy()[0], 2 * val, tol=1.0e-6)
511
- assert_np_equal(v40.numpy()[0], 2 * val, tol=1.0e-6)
512
- assert_np_equal(v41.numpy()[0], 2 * val, tol=1.0e-6)
513
- assert_np_equal(v42.numpy()[0], 2 * val, tol=1.0e-6)
514
- assert_np_equal(v43.numpy()[0], 2 * val, tol=1.0e-6)
515
- assert_np_equal(v50.numpy()[0], 2 * val, tol=1.0e-6)
516
- assert_np_equal(v51.numpy()[0], 2 * val, tol=1.0e-6)
517
- assert_np_equal(v52.numpy()[0], 2 * val, tol=1.0e-6)
518
- assert_np_equal(v53.numpy()[0], 2 * val, tol=1.0e-6)
519
- assert_np_equal(v54.numpy()[0], 2 * val, tol=1.0e-6)
520
-
521
- input = wp.array(randvals(rng, [14], dtype), requires_grad=True, device=device)
522
- tape = wp.Tape()
523
- with tape:
524
- wp.launch(
525
- vec_kernel,
526
- dim=1,
527
- inputs=[input],
528
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
529
- device=device,
530
- )
531
-
532
- if dtype in np_float_types:
533
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
534
- tape.backward(loss=l)
535
- grad = tape.gradients[input].numpy()
536
- expected_grad = np.zeros_like(grad)
537
- expected_grad[i] = 2
538
- assert_np_equal(grad, expected_grad, tol=tol)
539
- tape.zero()
540
-
541
- assert_np_equal(v2.numpy()[0, 0], input.numpy()[0], tol=tol)
542
- assert_np_equal(v2.numpy()[0, 1], input.numpy()[1], tol=tol)
543
- assert_np_equal(v3.numpy()[0, 0], input.numpy()[2], tol=tol)
544
- assert_np_equal(v3.numpy()[0, 1], input.numpy()[3], tol=tol)
545
- assert_np_equal(v3.numpy()[0, 2], input.numpy()[4], tol=tol)
546
- assert_np_equal(v4.numpy()[0, 0], input.numpy()[5], tol=tol)
547
- assert_np_equal(v4.numpy()[0, 1], input.numpy()[6], tol=tol)
548
- assert_np_equal(v4.numpy()[0, 2], input.numpy()[7], tol=tol)
549
- assert_np_equal(v4.numpy()[0, 3], input.numpy()[8], tol=tol)
550
- assert_np_equal(v5.numpy()[0, 0], input.numpy()[9], tol=tol)
551
- assert_np_equal(v5.numpy()[0, 1], input.numpy()[10], tol=tol)
552
- assert_np_equal(v5.numpy()[0, 2], input.numpy()[11], tol=tol)
553
- assert_np_equal(v5.numpy()[0, 3], input.numpy()[12], tol=tol)
554
- assert_np_equal(v5.numpy()[0, 4], input.numpy()[13], tol=tol)
555
-
556
- assert_np_equal(v20.numpy()[0], 2 * input.numpy()[0], tol=tol)
557
- assert_np_equal(v21.numpy()[0], 2 * input.numpy()[1], tol=tol)
558
- assert_np_equal(v30.numpy()[0], 2 * input.numpy()[2], tol=tol)
559
- assert_np_equal(v31.numpy()[0], 2 * input.numpy()[3], tol=tol)
560
- assert_np_equal(v32.numpy()[0], 2 * input.numpy()[4], tol=tol)
561
- assert_np_equal(v40.numpy()[0], 2 * input.numpy()[5], tol=tol)
562
- assert_np_equal(v41.numpy()[0], 2 * input.numpy()[6], tol=tol)
563
- assert_np_equal(v42.numpy()[0], 2 * input.numpy()[7], tol=tol)
564
- assert_np_equal(v43.numpy()[0], 2 * input.numpy()[8], tol=tol)
565
- assert_np_equal(v50.numpy()[0], 2 * input.numpy()[9], tol=tol)
566
- assert_np_equal(v51.numpy()[0], 2 * input.numpy()[10], tol=tol)
567
- assert_np_equal(v52.numpy()[0], 2 * input.numpy()[11], tol=tol)
568
- assert_np_equal(v53.numpy()[0], 2 * input.numpy()[12], tol=tol)
569
- assert_np_equal(v54.numpy()[0], 2 * input.numpy()[13], tol=tol)
570
-
571
-
572
54
  def test_anon_constructor_error_dtype_keyword_missing(test, device):
573
55
  @wp.kernel
574
56
  def kernel():
@@ -710,1093 +192,21 @@ def test_tpl_constructor_error_numeric_args_mismatch(test, device):
710
192
  )
711
193
 
712
194
 
713
- def test_tpl_ops_with_anon(test, device):
714
- vec3i = wp.vec(3, dtype=int)
715
-
716
- v = wp.vec3i(1, 2, 3)
717
- v += vec3i(2, 3, 4)
718
- v -= vec3i(3, 4, 5)
719
- test.assertSequenceEqual(v, (0, 1, 2))
720
-
721
- v = vec3i(1, 2, 3)
722
- v += wp.vec3i(2, 3, 4)
723
- v -= wp.vec3i(3, 4, 5)
724
- test.assertSequenceEqual(v, (0, 1, 2))
725
-
726
-
727
- def test_indexing(test, device, dtype, register_kernels=False):
728
- rng = np.random.default_rng(123)
729
-
730
- tol = {
731
- np.float16: 5.0e-3,
732
- np.float32: 1.0e-6,
733
- np.float64: 1.0e-8,
734
- }.get(dtype, 0)
735
-
736
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
737
- vec2 = wp.types.vector(length=2, dtype=wptype)
738
- vec3 = wp.types.vector(length=3, dtype=wptype)
739
- vec4 = wp.types.vector(length=4, dtype=wptype)
740
- vec5 = wp.types.vector(length=5, dtype=wptype)
741
-
742
- def check_indexing(
743
- v2: wp.array(dtype=vec2),
744
- v3: wp.array(dtype=vec3),
745
- v4: wp.array(dtype=vec4),
746
- v5: wp.array(dtype=vec5),
747
- v20: wp.array(dtype=wptype),
748
- v21: wp.array(dtype=wptype),
749
- v30: wp.array(dtype=wptype),
750
- v31: wp.array(dtype=wptype),
751
- v32: wp.array(dtype=wptype),
752
- v40: wp.array(dtype=wptype),
753
- v41: wp.array(dtype=wptype),
754
- v42: wp.array(dtype=wptype),
755
- v43: wp.array(dtype=wptype),
756
- v50: wp.array(dtype=wptype),
757
- v51: wp.array(dtype=wptype),
758
- v52: wp.array(dtype=wptype),
759
- v53: wp.array(dtype=wptype),
760
- v54: wp.array(dtype=wptype),
761
- ):
762
- # multiply outputs by 2 so we've got something to backpropagate:
763
- v20[0] = wptype(2) * v2[0][0]
764
- v21[0] = wptype(2) * v2[0][1]
765
-
766
- v30[0] = wptype(2) * v3[0][0]
767
- v31[0] = wptype(2) * v3[0][1]
768
- v32[0] = wptype(2) * v3[0][2]
769
-
770
- v40[0] = wptype(2) * v4[0][0]
771
- v41[0] = wptype(2) * v4[0][1]
772
- v42[0] = wptype(2) * v4[0][2]
773
- v43[0] = wptype(2) * v4[0][3]
774
-
775
- v50[0] = wptype(2) * v5[0][0]
776
- v51[0] = wptype(2) * v5[0][1]
777
- v52[0] = wptype(2) * v5[0][2]
778
- v53[0] = wptype(2) * v5[0][3]
779
- v54[0] = wptype(2) * v5[0][4]
780
-
781
- kernel = getkernel(check_indexing, suffix=dtype.__name__)
782
-
783
- if register_kernels:
784
- return
785
-
786
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
787
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
788
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
789
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
790
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
791
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
792
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
793
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
794
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
795
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
796
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
797
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
798
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
799
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
800
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
801
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
802
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
803
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
804
-
805
- tape = wp.Tape()
806
- with tape:
807
- wp.launch(
808
- kernel,
809
- dim=1,
810
- inputs=[v2, v3, v4, v5],
811
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
812
- device=device,
813
- )
814
-
815
- if dtype in np_float_types:
816
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
817
- tape.backward(loss=l)
818
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
819
- expected_grads = np.zeros_like(allgrads)
820
- expected_grads[i] = 2
821
- assert_np_equal(allgrads, expected_grads, tol=tol)
822
- tape.zero()
823
-
824
- assert_np_equal(v20.numpy()[0], 2.0 * v2.numpy()[0, 0], tol=tol)
825
- assert_np_equal(v21.numpy()[0], 2.0 * v2.numpy()[0, 1], tol=tol)
826
- assert_np_equal(v30.numpy()[0], 2.0 * v3.numpy()[0, 0], tol=tol)
827
- assert_np_equal(v31.numpy()[0], 2.0 * v3.numpy()[0, 1], tol=tol)
828
- assert_np_equal(v32.numpy()[0], 2.0 * v3.numpy()[0, 2], tol=tol)
829
- assert_np_equal(v40.numpy()[0], 2.0 * v4.numpy()[0, 0], tol=tol)
830
- assert_np_equal(v41.numpy()[0], 2.0 * v4.numpy()[0, 1], tol=tol)
831
- assert_np_equal(v42.numpy()[0], 2.0 * v4.numpy()[0, 2], tol=tol)
832
- assert_np_equal(v43.numpy()[0], 2.0 * v4.numpy()[0, 3], tol=tol)
833
- assert_np_equal(v50.numpy()[0], 2.0 * v5.numpy()[0, 0], tol=tol)
834
- assert_np_equal(v51.numpy()[0], 2.0 * v5.numpy()[0, 1], tol=tol)
835
- assert_np_equal(v52.numpy()[0], 2.0 * v5.numpy()[0, 2], tol=tol)
836
- assert_np_equal(v53.numpy()[0], 2.0 * v5.numpy()[0, 3], tol=tol)
837
- assert_np_equal(v54.numpy()[0], 2.0 * v5.numpy()[0, 4], tol=tol)
838
-
839
-
840
- def test_equality(test, device, dtype, register_kernels=False):
841
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
842
- vec2 = wp.types.vector(length=2, dtype=wptype)
843
- vec3 = wp.types.vector(length=3, dtype=wptype)
844
- vec4 = wp.types.vector(length=4, dtype=wptype)
845
- vec5 = wp.types.vector(length=5, dtype=wptype)
846
-
847
- def check_equality(
848
- v20: wp.array(dtype=vec2),
849
- v21: wp.array(dtype=vec2),
850
- v22: wp.array(dtype=vec2),
851
- v30: wp.array(dtype=vec3),
852
- v31: wp.array(dtype=vec3),
853
- v32: wp.array(dtype=vec3),
854
- v33: wp.array(dtype=vec3),
855
- v40: wp.array(dtype=vec4),
856
- v41: wp.array(dtype=vec4),
857
- v42: wp.array(dtype=vec4),
858
- v43: wp.array(dtype=vec4),
859
- v44: wp.array(dtype=vec4),
860
- v50: wp.array(dtype=vec5),
861
- v51: wp.array(dtype=vec5),
862
- v52: wp.array(dtype=vec5),
863
- v53: wp.array(dtype=vec5),
864
- v54: wp.array(dtype=vec5),
865
- v55: wp.array(dtype=vec5),
866
- ):
867
- wp.expect_eq(v20[0], v20[0])
868
- wp.expect_neq(v21[0], v20[0])
869
- wp.expect_neq(v22[0], v20[0])
870
-
871
- wp.expect_eq(v30[0], v30[0])
872
- wp.expect_neq(v31[0], v30[0])
873
- wp.expect_neq(v32[0], v30[0])
874
- wp.expect_neq(v33[0], v30[0])
875
-
876
- wp.expect_eq(v40[0], v40[0])
877
- wp.expect_neq(v41[0], v40[0])
878
- wp.expect_neq(v42[0], v40[0])
879
- wp.expect_neq(v43[0], v40[0])
880
- wp.expect_neq(v44[0], v40[0])
881
-
882
- wp.expect_eq(v50[0], v50[0])
883
- wp.expect_neq(v51[0], v50[0])
884
- wp.expect_neq(v52[0], v50[0])
885
- wp.expect_neq(v53[0], v50[0])
886
- wp.expect_neq(v54[0], v50[0])
887
- wp.expect_neq(v55[0], v50[0])
888
-
889
- kernel = getkernel(check_equality, suffix=dtype.__name__)
890
-
891
- if register_kernels:
892
- return
893
-
894
- v20 = wp.array([1.0, 2.0], dtype=vec2, requires_grad=True, device=device)
895
- v21 = wp.array([1.0, 3.0], dtype=vec2, requires_grad=True, device=device)
896
- v22 = wp.array([3.0, 2.0], dtype=vec2, requires_grad=True, device=device)
897
-
898
- v30 = wp.array([1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
899
- v31 = wp.array([-1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
900
- v32 = wp.array([1.0, -2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
901
- v33 = wp.array([1.0, 2.0, -3.0], dtype=vec3, requires_grad=True, device=device)
902
-
903
- v40 = wp.array([1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
904
- v41 = wp.array([-1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
905
- v42 = wp.array([1.0, -2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
906
- v43 = wp.array([1.0, 2.0, -3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
907
- v44 = wp.array([1.0, 2.0, 3.0, -4.0], dtype=vec4, requires_grad=True, device=device)
908
-
909
- v50 = wp.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
910
- v51 = wp.array([-1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
911
- v52 = wp.array([1.0, -2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
912
- v53 = wp.array([1.0, 2.0, -3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
913
- v54 = wp.array([1.0, 2.0, 3.0, -4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
914
- v55 = wp.array([1.0, 2.0, 3.0, 4.0, -5.0], dtype=vec5, requires_grad=True, device=device)
915
- wp.launch(
916
- kernel,
917
- dim=1,
918
- inputs=[
919
- v20,
920
- v21,
921
- v22,
922
- v30,
923
- v31,
924
- v32,
925
- v33,
926
- v40,
927
- v41,
928
- v42,
929
- v43,
930
- v44,
931
- v50,
932
- v51,
933
- v52,
934
- v53,
935
- v54,
936
- v55,
937
- ],
938
- outputs=[],
939
- device=device,
940
- )
941
-
942
-
943
- def test_negation(test, device, dtype, register_kernels=False):
944
- rng = np.random.default_rng(123)
945
-
946
- tol = {
947
- np.float16: 5.0e-3,
948
- np.float32: 1.0e-6,
949
- np.float64: 1.0e-8,
950
- }.get(dtype, 0)
951
-
952
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
953
- vec2 = wp.types.vector(length=2, dtype=wptype)
954
- vec3 = wp.types.vector(length=3, dtype=wptype)
955
- vec4 = wp.types.vector(length=4, dtype=wptype)
956
- vec5 = wp.types.vector(length=5, dtype=wptype)
957
-
958
- def check_negation(
959
- v2: wp.array(dtype=vec2),
960
- v3: wp.array(dtype=vec3),
961
- v4: wp.array(dtype=vec4),
962
- v5: wp.array(dtype=vec5),
963
- v2out: wp.array(dtype=vec2),
964
- v3out: wp.array(dtype=vec3),
965
- v4out: wp.array(dtype=vec4),
966
- v5out: wp.array(dtype=vec5),
967
- v20: wp.array(dtype=wptype),
968
- v21: wp.array(dtype=wptype),
969
- v30: wp.array(dtype=wptype),
970
- v31: wp.array(dtype=wptype),
971
- v32: wp.array(dtype=wptype),
972
- v40: wp.array(dtype=wptype),
973
- v41: wp.array(dtype=wptype),
974
- v42: wp.array(dtype=wptype),
975
- v43: wp.array(dtype=wptype),
976
- v50: wp.array(dtype=wptype),
977
- v51: wp.array(dtype=wptype),
978
- v52: wp.array(dtype=wptype),
979
- v53: wp.array(dtype=wptype),
980
- v54: wp.array(dtype=wptype),
981
- ):
982
- v2result = -v2[0]
983
- v3result = -v3[0]
984
- v4result = -v4[0]
985
- v5result = -v5[0]
986
-
987
- v2out[0] = v2result
988
- v3out[0] = v3result
989
- v4out[0] = v4result
990
- v5out[0] = v5result
991
-
992
- # multiply these outputs by 2 so we've got something to backpropagate:
993
- v20[0] = wptype(2) * v2result[0]
994
- v21[0] = wptype(2) * v2result[1]
995
-
996
- v30[0] = wptype(2) * v3result[0]
997
- v31[0] = wptype(2) * v3result[1]
998
- v32[0] = wptype(2) * v3result[2]
999
-
1000
- v40[0] = wptype(2) * v4result[0]
1001
- v41[0] = wptype(2) * v4result[1]
1002
- v42[0] = wptype(2) * v4result[2]
1003
- v43[0] = wptype(2) * v4result[3]
1004
-
1005
- v50[0] = wptype(2) * v5result[0]
1006
- v51[0] = wptype(2) * v5result[1]
1007
- v52[0] = wptype(2) * v5result[2]
1008
- v53[0] = wptype(2) * v5result[3]
1009
- v54[0] = wptype(2) * v5result[4]
1010
-
1011
- kernel = getkernel(check_negation, suffix=dtype.__name__)
1012
-
1013
- if register_kernels:
1014
- return
1015
-
1016
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1017
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1018
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1019
- v5_np = randvals(rng, (1, 5), dtype)
1020
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
1021
-
1022
- v2out = wp.zeros(1, dtype=vec2, device=device)
1023
- v3out = wp.zeros(1, dtype=vec3, device=device)
1024
- v4out = wp.zeros(1, dtype=vec4, device=device)
1025
- v5out = wp.zeros(1, dtype=vec5, device=device)
1026
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1027
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1028
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1029
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1030
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1031
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1032
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1033
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1034
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1035
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1036
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1037
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1038
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1039
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1040
-
1041
- tape = wp.Tape()
1042
- with tape:
1043
- wp.launch(
1044
- kernel,
1045
- dim=1,
1046
- inputs=[v2, v3, v4, v5],
1047
- outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1048
- device=device,
1049
- )
1050
-
1051
- if dtype in np_float_types:
1052
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1053
- tape.backward(loss=l)
1054
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1055
- expected_grads = np.zeros_like(allgrads)
1056
- expected_grads[i] = -2
1057
- assert_np_equal(allgrads, expected_grads, tol=tol)
1058
- tape.zero()
1059
-
1060
- assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
1061
- assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
1062
- assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
1063
- assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
1064
-
1065
-
1066
- def test_scalar_multiplication(test, device, dtype, register_kernels=False):
1067
- rng = np.random.default_rng(123)
1068
-
1069
- tol = {
1070
- np.float16: 5.0e-3,
1071
- np.float32: 1.0e-6,
1072
- np.float64: 1.0e-8,
1073
- }.get(dtype, 0)
1074
-
1075
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1076
- vec2 = wp.types.vector(length=2, dtype=wptype)
1077
- vec3 = wp.types.vector(length=3, dtype=wptype)
1078
- vec4 = wp.types.vector(length=4, dtype=wptype)
1079
- vec5 = wp.types.vector(length=5, dtype=wptype)
1080
-
1081
- def check_mul(
1082
- s: wp.array(dtype=wptype),
1083
- v2: wp.array(dtype=vec2),
1084
- v3: wp.array(dtype=vec3),
1085
- v4: wp.array(dtype=vec4),
1086
- v5: wp.array(dtype=vec5),
1087
- v20: wp.array(dtype=wptype),
1088
- v21: wp.array(dtype=wptype),
1089
- v30: wp.array(dtype=wptype),
1090
- v31: wp.array(dtype=wptype),
1091
- v32: wp.array(dtype=wptype),
1092
- v40: wp.array(dtype=wptype),
1093
- v41: wp.array(dtype=wptype),
1094
- v42: wp.array(dtype=wptype),
1095
- v43: wp.array(dtype=wptype),
1096
- v50: wp.array(dtype=wptype),
1097
- v51: wp.array(dtype=wptype),
1098
- v52: wp.array(dtype=wptype),
1099
- v53: wp.array(dtype=wptype),
1100
- v54: wp.array(dtype=wptype),
1101
- ):
1102
- v2result = s[0] * v2[0]
1103
- v3result = s[0] * v3[0]
1104
- v4result = s[0] * v4[0]
1105
- v5result = s[0] * v5[0]
1106
-
1107
- # multiply outputs by 2 so we've got something to backpropagate:
1108
- v20[0] = wptype(2) * v2result[0]
1109
- v21[0] = wptype(2) * v2result[1]
1110
-
1111
- v30[0] = wptype(2) * v3result[0]
1112
- v31[0] = wptype(2) * v3result[1]
1113
- v32[0] = wptype(2) * v3result[2]
1114
-
1115
- v40[0] = wptype(2) * v4result[0]
1116
- v41[0] = wptype(2) * v4result[1]
1117
- v42[0] = wptype(2) * v4result[2]
1118
- v43[0] = wptype(2) * v4result[3]
1119
-
1120
- v50[0] = wptype(2) * v5result[0]
1121
- v51[0] = wptype(2) * v5result[1]
1122
- v52[0] = wptype(2) * v5result[2]
1123
- v53[0] = wptype(2) * v5result[3]
1124
- v54[0] = wptype(2) * v5result[4]
1125
-
1126
- kernel = getkernel(check_mul, suffix=dtype.__name__)
1127
-
1128
- if register_kernels:
1129
- return
1130
-
1131
- s = wp.array(randvals(rng, [1], dtype), requires_grad=True, device=device)
1132
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1133
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1134
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1135
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1136
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1137
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1138
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1139
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1140
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1141
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1142
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1143
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1144
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1145
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1146
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1147
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1148
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1149
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1150
- tape = wp.Tape()
1151
- with tape:
1152
- wp.launch(
1153
- kernel,
1154
- dim=1,
1155
- inputs=[
1156
- s,
1157
- v2,
1158
- v3,
1159
- v4,
1160
- v5,
1161
- ],
1162
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1163
- device=device,
1164
- )
1165
-
1166
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
1167
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
1168
-
1169
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
1170
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
1171
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
1172
-
1173
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
1174
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
1175
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
1176
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
1177
-
1178
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
1179
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
1180
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
1181
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
1182
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
1183
-
1184
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1185
-
1186
- if dtype in np_float_types:
1187
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
1188
- tape.backward(loss=l)
1189
- sgrad = tape.gradients[s].numpy()[0]
1190
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
1191
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
1192
- expected_grads = np.zeros_like(allgrads)
1193
- expected_grads[i] = s.numpy()[0] * 2
1194
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1195
- tape.zero()
1196
-
1197
-
1198
- def test_scalar_multiplication_rightmul(test, device, dtype, register_kernels=False):
1199
- rng = np.random.default_rng(123)
1200
-
1201
- tol = {
1202
- np.float16: 5.0e-3,
1203
- np.float32: 1.0e-6,
1204
- np.float64: 1.0e-8,
1205
- }.get(dtype, 0)
1206
-
1207
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1208
- vec2 = wp.types.vector(length=2, dtype=wptype)
1209
- vec3 = wp.types.vector(length=3, dtype=wptype)
1210
- vec4 = wp.types.vector(length=4, dtype=wptype)
1211
- vec5 = wp.types.vector(length=5, dtype=wptype)
1212
-
1213
- def check_rightmul(
1214
- s: wp.array(dtype=wptype),
1215
- v2: wp.array(dtype=vec2),
1216
- v3: wp.array(dtype=vec3),
1217
- v4: wp.array(dtype=vec4),
1218
- v5: wp.array(dtype=vec5),
1219
- v20: wp.array(dtype=wptype),
1220
- v21: wp.array(dtype=wptype),
1221
- v30: wp.array(dtype=wptype),
1222
- v31: wp.array(dtype=wptype),
1223
- v32: wp.array(dtype=wptype),
1224
- v40: wp.array(dtype=wptype),
1225
- v41: wp.array(dtype=wptype),
1226
- v42: wp.array(dtype=wptype),
1227
- v43: wp.array(dtype=wptype),
1228
- v50: wp.array(dtype=wptype),
1229
- v51: wp.array(dtype=wptype),
1230
- v52: wp.array(dtype=wptype),
1231
- v53: wp.array(dtype=wptype),
1232
- v54: wp.array(dtype=wptype),
1233
- ):
1234
- v2result = v2[0] * s[0]
1235
- v3result = v3[0] * s[0]
1236
- v4result = v4[0] * s[0]
1237
- v5result = v5[0] * s[0]
1238
-
1239
- # multiply outputs by 2 so we've got something to backpropagate:
1240
- v20[0] = wptype(2) * v2result[0]
1241
- v21[0] = wptype(2) * v2result[1]
1242
-
1243
- v30[0] = wptype(2) * v3result[0]
1244
- v31[0] = wptype(2) * v3result[1]
1245
- v32[0] = wptype(2) * v3result[2]
1246
-
1247
- v40[0] = wptype(2) * v4result[0]
1248
- v41[0] = wptype(2) * v4result[1]
1249
- v42[0] = wptype(2) * v4result[2]
1250
- v43[0] = wptype(2) * v4result[3]
1251
-
1252
- v50[0] = wptype(2) * v5result[0]
1253
- v51[0] = wptype(2) * v5result[1]
1254
- v52[0] = wptype(2) * v5result[2]
1255
- v53[0] = wptype(2) * v5result[3]
1256
- v54[0] = wptype(2) * v5result[4]
1257
-
1258
- kernel = getkernel(check_rightmul, suffix=dtype.__name__)
1259
-
1260
- if register_kernels:
1261
- return
1262
-
1263
- s = wp.array(randvals(rng, [1], dtype), requires_grad=True, device=device)
1264
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1265
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1266
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1267
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1268
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1269
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1270
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1271
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1272
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1273
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1274
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1275
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1276
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1277
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1278
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1279
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1280
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1281
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1282
- tape = wp.Tape()
1283
- with tape:
1284
- wp.launch(
1285
- kernel,
1286
- dim=1,
1287
- inputs=[
1288
- s,
1289
- v2,
1290
- v3,
1291
- v4,
1292
- v5,
1293
- ],
1294
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1295
- device=device,
1296
- )
1297
-
1298
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
1299
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
1300
-
1301
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
1302
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
1303
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
1304
-
1305
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
1306
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
1307
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
1308
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
1309
-
1310
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
1311
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
1312
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
1313
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
1314
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
1315
-
1316
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1317
-
1318
- if dtype in np_float_types:
1319
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
1320
- tape.backward(loss=l)
1321
- sgrad = tape.gradients[s].numpy()[0]
1322
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
1323
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
1324
- expected_grads = np.zeros_like(allgrads)
1325
- expected_grads[i] = s.numpy()[0] * 2
1326
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1327
- tape.zero()
1328
-
1329
-
1330
- def test_cw_multiplication(test, device, dtype, register_kernels=False):
1331
- rng = np.random.default_rng(123)
1332
-
1333
- tol = {
1334
- np.float16: 5.0e-3,
1335
- np.float32: 1.0e-6,
1336
- np.float64: 1.0e-8,
1337
- }.get(dtype, 0)
1338
-
1339
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1340
- vec2 = wp.types.vector(length=2, dtype=wptype)
1341
- vec3 = wp.types.vector(length=3, dtype=wptype)
1342
- vec4 = wp.types.vector(length=4, dtype=wptype)
1343
- vec5 = wp.types.vector(length=5, dtype=wptype)
1344
-
1345
- def check_cw_mul(
1346
- s2: wp.array(dtype=vec2),
1347
- s3: wp.array(dtype=vec3),
1348
- s4: wp.array(dtype=vec4),
1349
- s5: wp.array(dtype=vec5),
1350
- v2: wp.array(dtype=vec2),
1351
- v3: wp.array(dtype=vec3),
1352
- v4: wp.array(dtype=vec4),
1353
- v5: wp.array(dtype=vec5),
1354
- v20: wp.array(dtype=wptype),
1355
- v21: wp.array(dtype=wptype),
1356
- v30: wp.array(dtype=wptype),
1357
- v31: wp.array(dtype=wptype),
1358
- v32: wp.array(dtype=wptype),
1359
- v40: wp.array(dtype=wptype),
1360
- v41: wp.array(dtype=wptype),
1361
- v42: wp.array(dtype=wptype),
1362
- v43: wp.array(dtype=wptype),
1363
- v50: wp.array(dtype=wptype),
1364
- v51: wp.array(dtype=wptype),
1365
- v52: wp.array(dtype=wptype),
1366
- v53: wp.array(dtype=wptype),
1367
- v54: wp.array(dtype=wptype),
1368
- ):
1369
- v2result = wp.cw_mul(s2[0], v2[0])
1370
- v3result = wp.cw_mul(s3[0], v3[0])
1371
- v4result = wp.cw_mul(s4[0], v4[0])
1372
- v5result = wp.cw_mul(s5[0], v5[0])
1373
-
1374
- v20[0] = wptype(2) * v2result[0]
1375
- v21[0] = wptype(2) * v2result[1]
1376
-
1377
- v30[0] = wptype(2) * v3result[0]
1378
- v31[0] = wptype(2) * v3result[1]
1379
- v32[0] = wptype(2) * v3result[2]
1380
-
1381
- v40[0] = wptype(2) * v4result[0]
1382
- v41[0] = wptype(2) * v4result[1]
1383
- v42[0] = wptype(2) * v4result[2]
1384
- v43[0] = wptype(2) * v4result[3]
1385
-
1386
- v50[0] = wptype(2) * v5result[0]
1387
- v51[0] = wptype(2) * v5result[1]
1388
- v52[0] = wptype(2) * v5result[2]
1389
- v53[0] = wptype(2) * v5result[3]
1390
- v54[0] = wptype(2) * v5result[4]
1391
-
1392
- kernel = getkernel(check_cw_mul, suffix=dtype.__name__)
1393
-
1394
- if register_kernels:
1395
- return
1396
-
1397
- s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1398
- s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1399
- s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1400
- s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1401
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1402
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1403
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1404
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1405
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1406
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1407
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1408
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1409
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1410
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1411
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1412
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1413
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1414
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1415
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1416
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1417
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1418
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1419
- tape = wp.Tape()
1420
- with tape:
1421
- wp.launch(
1422
- kernel,
1423
- dim=1,
1424
- inputs=[
1425
- s2,
1426
- s3,
1427
- s4,
1428
- s5,
1429
- v2,
1430
- v3,
1431
- v4,
1432
- v5,
1433
- ],
1434
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1435
- device=device,
1436
- )
1437
-
1438
- assert_np_equal(v20.numpy()[0], 2 * s2.numpy()[0, 0] * v2.numpy()[0, 0], tol=10 * tol)
1439
- assert_np_equal(v21.numpy()[0], 2 * s2.numpy()[0, 1] * v2.numpy()[0, 1], tol=10 * tol)
1440
-
1441
- assert_np_equal(v30.numpy()[0], 2 * s3.numpy()[0, 0] * v3.numpy()[0, 0], tol=10 * tol)
1442
- assert_np_equal(v31.numpy()[0], 2 * s3.numpy()[0, 1] * v3.numpy()[0, 1], tol=10 * tol)
1443
- assert_np_equal(v32.numpy()[0], 2 * s3.numpy()[0, 2] * v3.numpy()[0, 2], tol=10 * tol)
1444
-
1445
- assert_np_equal(v40.numpy()[0], 2 * s4.numpy()[0, 0] * v4.numpy()[0, 0], tol=10 * tol)
1446
- assert_np_equal(v41.numpy()[0], 2 * s4.numpy()[0, 1] * v4.numpy()[0, 1], tol=10 * tol)
1447
- assert_np_equal(v42.numpy()[0], 2 * s4.numpy()[0, 2] * v4.numpy()[0, 2], tol=10 * tol)
1448
- assert_np_equal(v43.numpy()[0], 2 * s4.numpy()[0, 3] * v4.numpy()[0, 3], tol=10 * tol)
1449
-
1450
- assert_np_equal(v50.numpy()[0], 2 * s5.numpy()[0, 0] * v5.numpy()[0, 0], tol=10 * tol)
1451
- assert_np_equal(v51.numpy()[0], 2 * s5.numpy()[0, 1] * v5.numpy()[0, 1], tol=10 * tol)
1452
- assert_np_equal(v52.numpy()[0], 2 * s5.numpy()[0, 2] * v5.numpy()[0, 2], tol=10 * tol)
1453
- assert_np_equal(v53.numpy()[0], 2 * s5.numpy()[0, 3] * v5.numpy()[0, 3], tol=10 * tol)
1454
- assert_np_equal(v54.numpy()[0], 2 * s5.numpy()[0, 4] * v5.numpy()[0, 4], tol=10 * tol)
1455
-
1456
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1457
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
1458
-
1459
- if dtype in np_float_types:
1460
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1461
- tape.backward(loss=l)
1462
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1463
- expected_grads = np.zeros_like(sgrads)
1464
- expected_grads[i] = incmps[i] * 2
1465
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1466
-
1467
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1468
- expected_grads = np.zeros_like(allgrads)
1469
- expected_grads[i] = scmps[i] * 2
1470
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1471
-
1472
- tape.zero()
1473
-
1474
-
1475
- def test_scalar_division(test, device, dtype, register_kernels=False):
1476
- rng = np.random.default_rng(123)
1477
-
1478
- tol = {
1479
- np.float16: 5.0e-3,
1480
- np.float32: 1.0e-6,
1481
- np.float64: 1.0e-8,
1482
- }.get(dtype, 0)
1483
-
1484
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1485
- vec2 = wp.types.vector(length=2, dtype=wptype)
1486
- vec3 = wp.types.vector(length=3, dtype=wptype)
1487
- vec4 = wp.types.vector(length=4, dtype=wptype)
1488
- vec5 = wp.types.vector(length=5, dtype=wptype)
1489
-
1490
- def check_div(
1491
- s: wp.array(dtype=wptype),
1492
- v2: wp.array(dtype=vec2),
1493
- v3: wp.array(dtype=vec3),
1494
- v4: wp.array(dtype=vec4),
1495
- v5: wp.array(dtype=vec5),
1496
- v20: wp.array(dtype=wptype),
1497
- v21: wp.array(dtype=wptype),
1498
- v30: wp.array(dtype=wptype),
1499
- v31: wp.array(dtype=wptype),
1500
- v32: wp.array(dtype=wptype),
1501
- v40: wp.array(dtype=wptype),
1502
- v41: wp.array(dtype=wptype),
1503
- v42: wp.array(dtype=wptype),
1504
- v43: wp.array(dtype=wptype),
1505
- v50: wp.array(dtype=wptype),
1506
- v51: wp.array(dtype=wptype),
1507
- v52: wp.array(dtype=wptype),
1508
- v53: wp.array(dtype=wptype),
1509
- v54: wp.array(dtype=wptype),
1510
- ):
1511
- v2result = v2[0] / s[0]
1512
- v3result = v3[0] / s[0]
1513
- v4result = v4[0] / s[0]
1514
- v5result = v5[0] / s[0]
1515
-
1516
- v20[0] = wptype(2) * v2result[0]
1517
- v21[0] = wptype(2) * v2result[1]
1518
-
1519
- v30[0] = wptype(2) * v3result[0]
1520
- v31[0] = wptype(2) * v3result[1]
1521
- v32[0] = wptype(2) * v3result[2]
1522
-
1523
- v40[0] = wptype(2) * v4result[0]
1524
- v41[0] = wptype(2) * v4result[1]
1525
- v42[0] = wptype(2) * v4result[2]
1526
- v43[0] = wptype(2) * v4result[3]
1527
-
1528
- v50[0] = wptype(2) * v5result[0]
1529
- v51[0] = wptype(2) * v5result[1]
1530
- v52[0] = wptype(2) * v5result[2]
1531
- v53[0] = wptype(2) * v5result[3]
1532
- v54[0] = wptype(2) * v5result[4]
1533
-
1534
- kernel = getkernel(check_div, suffix=dtype.__name__)
1535
-
1536
- if register_kernels:
1537
- return
1538
-
1539
- s = wp.array(randvals(rng, [1], dtype), requires_grad=True, device=device)
1540
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1541
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1542
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1543
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1544
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1545
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1546
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1547
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1548
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1549
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1550
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1551
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1552
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1553
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1554
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1555
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1556
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1557
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1558
- tape = wp.Tape()
1559
- with tape:
1560
- wp.launch(
1561
- kernel,
1562
- dim=1,
1563
- inputs=[
1564
- s,
1565
- v2,
1566
- v3,
1567
- v4,
1568
- v5,
1569
- ],
1570
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1571
- device=device,
1572
- )
1573
-
1574
- if dtype in np_int_types:
1575
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // (s.numpy()[0])), tol=tol)
1576
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // (s.numpy()[0])), tol=tol)
1577
-
1578
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1579
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1580
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1581
-
1582
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1583
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1584
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1585
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1586
-
1587
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1588
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1589
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1590
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1591
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // (s.numpy()[0])), tol=10 * tol)
1592
-
1593
- else:
1594
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / (s.numpy()[0]), tol=tol)
1595
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / (s.numpy()[0]), tol=tol)
1596
-
1597
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1598
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1599
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1600
-
1601
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1602
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1603
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1604
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1605
-
1606
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1607
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1608
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1609
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1610
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / (s.numpy()[0]), tol=10 * tol)
1611
-
1612
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1613
-
1614
- if dtype in np_float_types:
1615
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1616
- tape.backward(loss=l)
1617
- sgrad = tape.gradients[s].numpy()[0]
1618
-
1619
- # d/ds v/s = -v/s^2
1620
- assert_np_equal(sgrad, -2 * incmps[i] / (s.numpy()[0] * s.numpy()[0]), tol=10 * tol)
1621
-
1622
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1623
- expected_grads = np.zeros_like(allgrads)
1624
- expected_grads[i] = 2 / s.numpy()[0]
1625
-
1626
- # d/dv v/s = 1/s
1627
- assert_np_equal(allgrads, expected_grads, tol=tol)
1628
- tape.zero()
1629
-
1630
-
1631
- def test_cw_division(test, device, dtype, register_kernels=False):
1632
- rng = np.random.default_rng(123)
1633
-
1634
- tol = {
1635
- np.float16: 1.0e-2,
1636
- np.float32: 1.0e-6,
1637
- np.float64: 1.0e-8,
1638
- }.get(dtype, 0)
1639
-
1640
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1641
- vec2 = wp.types.vector(length=2, dtype=wptype)
1642
- vec3 = wp.types.vector(length=3, dtype=wptype)
1643
- vec4 = wp.types.vector(length=4, dtype=wptype)
1644
- vec5 = wp.types.vector(length=5, dtype=wptype)
1645
-
1646
- def check_cw_div(
1647
- s2: wp.array(dtype=vec2),
1648
- s3: wp.array(dtype=vec3),
1649
- s4: wp.array(dtype=vec4),
1650
- s5: wp.array(dtype=vec5),
1651
- v2: wp.array(dtype=vec2),
1652
- v3: wp.array(dtype=vec3),
1653
- v4: wp.array(dtype=vec4),
1654
- v5: wp.array(dtype=vec5),
1655
- v20: wp.array(dtype=wptype),
1656
- v21: wp.array(dtype=wptype),
1657
- v30: wp.array(dtype=wptype),
1658
- v31: wp.array(dtype=wptype),
1659
- v32: wp.array(dtype=wptype),
1660
- v40: wp.array(dtype=wptype),
1661
- v41: wp.array(dtype=wptype),
1662
- v42: wp.array(dtype=wptype),
1663
- v43: wp.array(dtype=wptype),
1664
- v50: wp.array(dtype=wptype),
1665
- v51: wp.array(dtype=wptype),
1666
- v52: wp.array(dtype=wptype),
1667
- v53: wp.array(dtype=wptype),
1668
- v54: wp.array(dtype=wptype),
1669
- ):
1670
- v2result = wp.cw_div(v2[0], s2[0])
1671
- v3result = wp.cw_div(v3[0], s3[0])
1672
- v4result = wp.cw_div(v4[0], s4[0])
1673
- v5result = wp.cw_div(v5[0], s5[0])
1674
-
1675
- v20[0] = wptype(2) * v2result[0]
1676
- v21[0] = wptype(2) * v2result[1]
1677
-
1678
- v30[0] = wptype(2) * v3result[0]
1679
- v31[0] = wptype(2) * v3result[1]
1680
- v32[0] = wptype(2) * v3result[2]
1681
-
1682
- v40[0] = wptype(2) * v4result[0]
1683
- v41[0] = wptype(2) * v4result[1]
1684
- v42[0] = wptype(2) * v4result[2]
1685
- v43[0] = wptype(2) * v4result[3]
1686
-
1687
- v50[0] = wptype(2) * v5result[0]
1688
- v51[0] = wptype(2) * v5result[1]
1689
- v52[0] = wptype(2) * v5result[2]
1690
- v53[0] = wptype(2) * v5result[3]
1691
- v54[0] = wptype(2) * v5result[4]
1692
-
1693
- kernel = getkernel(check_cw_div, suffix=dtype.__name__)
1694
-
1695
- if register_kernels:
1696
- return
1697
-
1698
- s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1699
- s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1700
- s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1701
- s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1702
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1703
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1704
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1705
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1706
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1707
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1708
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1709
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1710
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1711
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1712
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1713
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1714
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1715
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1716
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1717
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1718
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1719
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1720
- tape = wp.Tape()
1721
- with tape:
1722
- wp.launch(
1723
- kernel,
1724
- dim=1,
1725
- inputs=[
1726
- s2,
1727
- s3,
1728
- s4,
1729
- s5,
1730
- v2,
1731
- v3,
1732
- v4,
1733
- v5,
1734
- ],
1735
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1736
- device=device,
1737
- )
1738
-
1739
- if dtype in np_int_types:
1740
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // s2.numpy()[0, 0]), tol=tol)
1741
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // s2.numpy()[0, 1]), tol=tol)
1742
-
1743
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // s3.numpy()[0, 0]), tol=tol)
1744
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // s3.numpy()[0, 1]), tol=tol)
1745
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // s3.numpy()[0, 2]), tol=tol)
1746
-
1747
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // s4.numpy()[0, 0]), tol=tol)
1748
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // s4.numpy()[0, 1]), tol=tol)
1749
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // s4.numpy()[0, 2]), tol=tol)
1750
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // s4.numpy()[0, 3]), tol=tol)
1751
-
1752
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // s5.numpy()[0, 0]), tol=tol)
1753
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // s5.numpy()[0, 1]), tol=tol)
1754
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // s5.numpy()[0, 2]), tol=tol)
1755
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // s5.numpy()[0, 3]), tol=tol)
1756
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // s5.numpy()[0, 4]), tol=tol)
1757
- else:
1758
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / s2.numpy()[0, 0], tol=tol)
1759
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / s2.numpy()[0, 1], tol=tol)
1760
-
1761
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / s3.numpy()[0, 0], tol=tol)
1762
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / s3.numpy()[0, 1], tol=tol)
1763
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / s3.numpy()[0, 2], tol=tol)
1764
-
1765
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / s4.numpy()[0, 0], tol=tol)
1766
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / s4.numpy()[0, 1], tol=tol)
1767
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / s4.numpy()[0, 2], tol=tol)
1768
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / s4.numpy()[0, 3], tol=tol)
1769
-
1770
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / s5.numpy()[0, 0], tol=tol)
1771
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / s5.numpy()[0, 1], tol=tol)
1772
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / s5.numpy()[0, 2], tol=tol)
1773
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / s5.numpy()[0, 3], tol=tol)
1774
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / s5.numpy()[0, 4], tol=tol)
1775
-
1776
- if dtype in np_float_types:
1777
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1778
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
1779
-
1780
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1781
- tape.backward(loss=l)
1782
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1783
- expected_grads = np.zeros_like(sgrads)
1784
-
1785
- # d/ds v/s = -v/s^2
1786
- expected_grads[i] = -incmps[i] * 2 / (scmps[i] * scmps[i])
1787
- assert_np_equal(sgrads, expected_grads, tol=20 * tol)
1788
-
1789
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1790
- expected_grads = np.zeros_like(allgrads)
195
+ def test_tpl_ops_with_anon(test, device):
196
+ vec3i = wp.vec(3, dtype=int)
1791
197
 
1792
- # d/dv v/s = 1/s
1793
- expected_grads[i] = 2 / scmps[i]
1794
- assert_np_equal(allgrads, expected_grads, tol=tol)
198
+ v = wp.vec3i(1, 2, 3)
199
+ v += vec3i(2, 3, 4)
200
+ v -= vec3i(3, 4, 5)
201
+ test.assertSequenceEqual(v, (0, 1, 2))
1795
202
 
1796
- tape.zero()
203
+ v = vec3i(1, 2, 3)
204
+ v += wp.vec3i(2, 3, 4)
205
+ v -= wp.vec3i(3, 4, 5)
206
+ test.assertSequenceEqual(v, (0, 1, 2))
1797
207
 
1798
208
 
1799
- def test_addition(test, device, dtype, register_kernels=False):
209
+ def test_negation(test, device, dtype, register_kernels=False):
1800
210
  rng = np.random.default_rng(123)
1801
211
 
1802
212
  tol = {
@@ -1811,15 +221,15 @@ def test_addition(test, device, dtype, register_kernels=False):
1811
221
  vec4 = wp.types.vector(length=4, dtype=wptype)
1812
222
  vec5 = wp.types.vector(length=5, dtype=wptype)
1813
223
 
1814
- def check_add(
1815
- s2: wp.array(dtype=vec2),
1816
- s3: wp.array(dtype=vec3),
1817
- s4: wp.array(dtype=vec4),
1818
- s5: wp.array(dtype=vec5),
224
+ def check_negation(
1819
225
  v2: wp.array(dtype=vec2),
1820
226
  v3: wp.array(dtype=vec3),
1821
227
  v4: wp.array(dtype=vec4),
1822
228
  v5: wp.array(dtype=vec5),
229
+ v2out: wp.array(dtype=vec2),
230
+ v3out: wp.array(dtype=vec3),
231
+ v4out: wp.array(dtype=vec4),
232
+ v5out: wp.array(dtype=vec5),
1823
233
  v20: wp.array(dtype=wptype),
1824
234
  v21: wp.array(dtype=wptype),
1825
235
  v30: wp.array(dtype=wptype),
@@ -1835,11 +245,17 @@ def test_addition(test, device, dtype, register_kernels=False):
1835
245
  v53: wp.array(dtype=wptype),
1836
246
  v54: wp.array(dtype=wptype),
1837
247
  ):
1838
- v2result = v2[0] + s2[0]
1839
- v3result = v3[0] + s3[0]
1840
- v4result = v4[0] + s4[0]
1841
- v5result = v5[0] + s5[0]
248
+ v2result = -v2[0]
249
+ v3result = -v3[0]
250
+ v4result = -v4[0]
251
+ v5result = -v5[0]
252
+
253
+ v2out[0] = v2result
254
+ v3out[0] = v3result
255
+ v4out[0] = v4result
256
+ v5out[0] = v5result
1842
257
 
258
+ # multiply these outputs by 2 so we've got something to backpropagate:
1843
259
  v20[0] = wptype(2) * v2result[0]
1844
260
  v21[0] = wptype(2) * v2result[1]
1845
261
 
@@ -1858,19 +274,21 @@ def test_addition(test, device, dtype, register_kernels=False):
1858
274
  v53[0] = wptype(2) * v5result[3]
1859
275
  v54[0] = wptype(2) * v5result[4]
1860
276
 
1861
- kernel = getkernel(check_add, suffix=dtype.__name__)
277
+ kernel = getkernel(check_negation, suffix=dtype.__name__)
1862
278
 
1863
279
  if register_kernels:
1864
280
  return
1865
281
 
1866
- s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1867
- s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1868
- s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1869
- s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1870
282
  v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1871
283
  v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1872
284
  v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1873
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
285
+ v5_np = randvals(rng, (1, 5), dtype)
286
+ v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
287
+
288
+ v2out = wp.zeros(1, dtype=vec2, device=device)
289
+ v3out = wp.zeros(1, dtype=vec3, device=device)
290
+ v4out = wp.zeros(1, dtype=vec4, device=device)
291
+ v5out = wp.zeros(1, dtype=vec5, device=device)
1874
292
  v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1875
293
  v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1876
294
  v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -1885,57 +303,31 @@ def test_addition(test, device, dtype, register_kernels=False):
1885
303
  v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1886
304
  v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1887
305
  v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
306
+
1888
307
  tape = wp.Tape()
1889
308
  with tape:
1890
309
  wp.launch(
1891
310
  kernel,
1892
311
  dim=1,
1893
- inputs=[
1894
- s2,
1895
- s3,
1896
- s4,
1897
- s5,
1898
- v2,
1899
- v3,
1900
- v4,
1901
- v5,
1902
- ],
1903
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
312
+ inputs=[v2, v3, v4, v5],
313
+ outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1904
314
  device=device,
1905
315
  )
1906
316
 
1907
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] + s2.numpy()[0, 0]), tol=tol)
1908
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] + s2.numpy()[0, 1]), tol=tol)
1909
-
1910
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] + s3.numpy()[0, 0]), tol=tol)
1911
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] + s3.numpy()[0, 1]), tol=tol)
1912
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] + s3.numpy()[0, 2]), tol=tol)
1913
-
1914
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] + s4.numpy()[0, 0]), tol=tol)
1915
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] + s4.numpy()[0, 1]), tol=tol)
1916
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] + s4.numpy()[0, 2]), tol=tol)
1917
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] + s4.numpy()[0, 3]), tol=tol)
1918
-
1919
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] + s5.numpy()[0, 0]), tol=tol)
1920
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] + s5.numpy()[0, 1]), tol=tol)
1921
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] + s5.numpy()[0, 2]), tol=tol)
1922
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] + s5.numpy()[0, 3]), tol=tol)
1923
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] + s5.numpy()[0, 4]), tol=2 * tol)
1924
-
1925
317
  if dtype in np_float_types:
1926
318
  for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1927
319
  tape.backward(loss=l)
1928
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1929
- expected_grads = np.zeros_like(sgrads)
1930
-
1931
- expected_grads[i] = 2
1932
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1933
-
1934
320
  allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
321
+ expected_grads = np.zeros_like(allgrads)
322
+ expected_grads[i] = -2
1935
323
  assert_np_equal(allgrads, expected_grads, tol=tol)
1936
-
1937
324
  tape.zero()
1938
325
 
326
+ assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
327
+ assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
328
+ assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
329
+ assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
330
+
1939
331
 
1940
332
  def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1941
333
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -2131,127 +523,6 @@ def test_subtraction(test, device, dtype, register_kernels=False):
2131
523
  tape.zero()
2132
524
 
2133
525
 
2134
- def test_dotproduct(test, device, dtype, register_kernels=False):
2135
- rng = np.random.default_rng(123)
2136
-
2137
- tol = {
2138
- np.float16: 1.0e-2,
2139
- np.float32: 1.0e-6,
2140
- np.float64: 1.0e-8,
2141
- }.get(dtype, 0)
2142
-
2143
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2144
- vec2 = wp.types.vector(length=2, dtype=wptype)
2145
- vec3 = wp.types.vector(length=3, dtype=wptype)
2146
- vec4 = wp.types.vector(length=4, dtype=wptype)
2147
- vec5 = wp.types.vector(length=5, dtype=wptype)
2148
-
2149
- def check_dot(
2150
- s2: wp.array(dtype=vec2),
2151
- s3: wp.array(dtype=vec3),
2152
- s4: wp.array(dtype=vec4),
2153
- s5: wp.array(dtype=vec5),
2154
- v2: wp.array(dtype=vec2),
2155
- v3: wp.array(dtype=vec3),
2156
- v4: wp.array(dtype=vec4),
2157
- v5: wp.array(dtype=vec5),
2158
- dot2: wp.array(dtype=wptype),
2159
- dot3: wp.array(dtype=wptype),
2160
- dot4: wp.array(dtype=wptype),
2161
- dot5: wp.array(dtype=wptype),
2162
- ):
2163
- dot2[0] = wptype(2) * wp.dot(v2[0], s2[0])
2164
- dot3[0] = wptype(2) * wp.dot(v3[0], s3[0])
2165
- dot4[0] = wptype(2) * wp.dot(v4[0], s4[0])
2166
- dot5[0] = wptype(2) * wp.dot(v5[0], s5[0])
2167
-
2168
- kernel = getkernel(check_dot, suffix=dtype.__name__)
2169
-
2170
- if register_kernels:
2171
- return
2172
-
2173
- s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2174
- s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2175
- s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2176
- s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2177
- v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2178
- v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2179
- v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2180
- v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2181
- dot2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2182
- dot3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2183
- dot4 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2184
- dot5 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2185
- tape = wp.Tape()
2186
- with tape:
2187
- wp.launch(
2188
- kernel,
2189
- dim=1,
2190
- inputs=[
2191
- s2,
2192
- s3,
2193
- s4,
2194
- s5,
2195
- v2,
2196
- v3,
2197
- v4,
2198
- v5,
2199
- ],
2200
- outputs=[dot2, dot3, dot4, dot5],
2201
- device=device,
2202
- )
2203
-
2204
- assert_np_equal(dot2.numpy()[0], 2.0 * (v2.numpy() * s2.numpy()).sum(), tol=10 * tol)
2205
- assert_np_equal(dot3.numpy()[0], 2.0 * (v3.numpy() * s3.numpy()).sum(), tol=10 * tol)
2206
- assert_np_equal(dot4.numpy()[0], 2.0 * (v4.numpy() * s4.numpy()).sum(), tol=10 * tol)
2207
- assert_np_equal(dot5.numpy()[0], 2.0 * (v5.numpy() * s5.numpy()).sum(), tol=10 * tol)
2208
-
2209
- if dtype in np_float_types:
2210
- tape.backward(loss=dot2)
2211
- sgrads = tape.gradients[s2].numpy()[0]
2212
- expected_grads = 2.0 * v2.numpy()[0]
2213
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2214
-
2215
- vgrads = tape.gradients[v2].numpy()[0]
2216
- expected_grads = 2.0 * s2.numpy()[0]
2217
- assert_np_equal(vgrads, expected_grads, tol=tol)
2218
-
2219
- tape.zero()
2220
-
2221
- tape.backward(loss=dot3)
2222
- sgrads = tape.gradients[s3].numpy()[0]
2223
- expected_grads = 2.0 * v3.numpy()[0]
2224
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2225
-
2226
- vgrads = tape.gradients[v3].numpy()[0]
2227
- expected_grads = 2.0 * s3.numpy()[0]
2228
- assert_np_equal(vgrads, expected_grads, tol=tol)
2229
-
2230
- tape.zero()
2231
-
2232
- tape.backward(loss=dot4)
2233
- sgrads = tape.gradients[s4].numpy()[0]
2234
- expected_grads = 2.0 * v4.numpy()[0]
2235
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2236
-
2237
- vgrads = tape.gradients[v4].numpy()[0]
2238
- expected_grads = 2.0 * s4.numpy()[0]
2239
- assert_np_equal(vgrads, expected_grads, tol=tol)
2240
-
2241
- tape.zero()
2242
-
2243
- tape.backward(loss=dot5)
2244
- sgrads = tape.gradients[s5].numpy()[0]
2245
- expected_grads = 2.0 * v5.numpy()[0]
2246
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2247
-
2248
- vgrads = tape.gradients[v5].numpy()[0]
2249
- expected_grads = 2.0 * s5.numpy()[0]
2250
- assert_np_equal(vgrads, expected_grads, tol=10 * tol)
2251
-
2252
- tape.zero()
2253
-
2254
-
2255
526
  def test_length(test, device, dtype, register_kernels=False):
2256
527
  rng = np.random.default_rng(123)
2257
528
 
@@ -2713,139 +984,6 @@ def test_crossproduct(test, device, dtype, register_kernels=False):
2713
984
  tape.zero()
2714
985
 
2715
986
 
2716
- def test_minmax(test, device, dtype, register_kernels=False):
2717
- rng = np.random.default_rng(123)
2718
-
2719
- # \TODO: not quite sure why, but the numbers are off for 16 bit float
2720
- # on the cpu (but not cuda). This is probably just the sketchy float16
2721
- # arithmetic I implemented to get all this stuff working, so
2722
- # hopefully that can be fixed when we do that correctly.
2723
- tol = {
2724
- np.float16: 1.0e-2,
2725
- }.get(dtype, 0)
2726
-
2727
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2728
- vec2 = wp.types.vector(length=2, dtype=wptype)
2729
- vec3 = wp.types.vector(length=3, dtype=wptype)
2730
- vec4 = wp.types.vector(length=4, dtype=wptype)
2731
- vec5 = wp.types.vector(length=5, dtype=wptype)
2732
-
2733
- # \TODO: Also not quite sure why: this kernel compiles incredibly
2734
- # slowly though...
2735
- def check_vec_min_max(
2736
- a: wp.array(dtype=wptype, ndim=2),
2737
- b: wp.array(dtype=wptype, ndim=2),
2738
- mins: wp.array(dtype=wptype, ndim=2),
2739
- maxs: wp.array(dtype=wptype, ndim=2),
2740
- ):
2741
- for i in range(10):
2742
- # multiplying by 2 so we've got something to backpropagate:
2743
- a2read = vec2(a[i, 0], a[i, 1])
2744
- b2read = vec2(b[i, 0], b[i, 1])
2745
- c2 = wptype(2) * wp.min(a2read, b2read)
2746
- d2 = wptype(2) * wp.max(a2read, b2read)
2747
-
2748
- a3read = vec3(a[i, 2], a[i, 3], a[i, 4])
2749
- b3read = vec3(b[i, 2], b[i, 3], b[i, 4])
2750
- c3 = wptype(2) * wp.min(a3read, b3read)
2751
- d3 = wptype(2) * wp.max(a3read, b3read)
2752
-
2753
- a4read = vec4(a[i, 5], a[i, 6], a[i, 7], a[i, 8])
2754
- b4read = vec4(b[i, 5], b[i, 6], b[i, 7], b[i, 8])
2755
- c4 = wptype(2) * wp.min(a4read, b4read)
2756
- d4 = wptype(2) * wp.max(a4read, b4read)
2757
-
2758
- a5read = vec5(a[i, 9], a[i, 10], a[i, 11], a[i, 12], a[i, 13])
2759
- b5read = vec5(b[i, 9], b[i, 10], b[i, 11], b[i, 12], b[i, 13])
2760
- c5 = wptype(2) * wp.min(a5read, b5read)
2761
- d5 = wptype(2) * wp.max(a5read, b5read)
2762
-
2763
- mins[i, 0] = c2[0]
2764
- mins[i, 1] = c2[1]
2765
-
2766
- mins[i, 2] = c3[0]
2767
- mins[i, 3] = c3[1]
2768
- mins[i, 4] = c3[2]
2769
-
2770
- mins[i, 5] = c4[0]
2771
- mins[i, 6] = c4[1]
2772
- mins[i, 7] = c4[2]
2773
- mins[i, 8] = c4[3]
2774
-
2775
- mins[i, 9] = c5[0]
2776
- mins[i, 10] = c5[1]
2777
- mins[i, 11] = c5[2]
2778
- mins[i, 12] = c5[3]
2779
- mins[i, 13] = c5[4]
2780
-
2781
- maxs[i, 0] = d2[0]
2782
- maxs[i, 1] = d2[1]
2783
-
2784
- maxs[i, 2] = d3[0]
2785
- maxs[i, 3] = d3[1]
2786
- maxs[i, 4] = d3[2]
2787
-
2788
- maxs[i, 5] = d4[0]
2789
- maxs[i, 6] = d4[1]
2790
- maxs[i, 7] = d4[2]
2791
- maxs[i, 8] = d4[3]
2792
-
2793
- maxs[i, 9] = d5[0]
2794
- maxs[i, 10] = d5[1]
2795
- maxs[i, 11] = d5[2]
2796
- maxs[i, 12] = d5[3]
2797
- maxs[i, 13] = d5[4]
2798
-
2799
- kernel = getkernel(check_vec_min_max, suffix=dtype.__name__)
2800
- output_select_kernel = get_select_kernel2(wptype)
2801
-
2802
- if register_kernels:
2803
- return
2804
-
2805
- a = wp.array(randvals(rng, (10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
2806
- b = wp.array(randvals(rng, (10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
2807
-
2808
- mins = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
2809
- maxs = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
2810
-
2811
- tape = wp.Tape()
2812
- with tape:
2813
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2814
-
2815
- assert_np_equal(mins.numpy(), 2 * np.minimum(a.numpy(), b.numpy()), tol=tol)
2816
- assert_np_equal(maxs.numpy(), 2 * np.maximum(a.numpy(), b.numpy()), tol=tol)
2817
-
2818
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2819
- if dtype in np_float_types:
2820
- for i in range(10):
2821
- for j in range(14):
2822
- tape = wp.Tape()
2823
- with tape:
2824
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2825
- wp.launch(output_select_kernel, dim=1, inputs=[mins, i, j], outputs=[out], device=device)
2826
-
2827
- tape.backward(loss=out)
2828
- expected = np.zeros_like(a.numpy())
2829
- expected[i, j] = 2 if (a.numpy()[i, j] < b.numpy()[i, j]) else 0
2830
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2831
- expected[i, j] = 2 if (b.numpy()[i, j] < a.numpy()[i, j]) else 0
2832
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2833
- tape.zero()
2834
-
2835
- tape = wp.Tape()
2836
- with tape:
2837
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2838
- wp.launch(output_select_kernel, dim=1, inputs=[maxs, i, j], outputs=[out], device=device)
2839
-
2840
- tape.backward(loss=out)
2841
- expected = np.zeros_like(a.numpy())
2842
- expected[i, j] = 2 if (a.numpy()[i, j] > b.numpy()[i, j]) else 0
2843
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2844
- expected[i, j] = 2 if (b.numpy()[i, j] > a.numpy()[i, j]) else 0
2845
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2846
- tape.zero()
2847
-
2848
-
2849
987
  def test_casting_constructors(test, device, dtype, register_kernels=False):
2850
988
  np_type = np.dtype(dtype)
2851
989
  wp_type = wp.types.np_dtype_to_warp_type[np_type]
@@ -2949,85 +1087,6 @@ def test_casting_constructors(test, device, dtype, register_kernels=False):
2949
1087
  assert_np_equal(out, a_grad.numpy())
2950
1088
 
2951
1089
 
2952
- def test_equivalent_types(test, device, dtype, register_kernels=False):
2953
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2954
-
2955
- # vector types
2956
- vec2 = wp.types.vector(length=2, dtype=wptype)
2957
- vec3 = wp.types.vector(length=3, dtype=wptype)
2958
- vec4 = wp.types.vector(length=4, dtype=wptype)
2959
- vec5 = wp.types.vector(length=5, dtype=wptype)
2960
-
2961
- # vector types equivalent to the above
2962
- vec2_equiv = wp.types.vector(length=2, dtype=wptype)
2963
- vec3_equiv = wp.types.vector(length=3, dtype=wptype)
2964
- vec4_equiv = wp.types.vector(length=4, dtype=wptype)
2965
- vec5_equiv = wp.types.vector(length=5, dtype=wptype)
2966
-
2967
- # declare kernel with original types
2968
- def check_equivalence(
2969
- v2: vec2,
2970
- v3: vec3,
2971
- v4: vec4,
2972
- v5: vec5,
2973
- ):
2974
- wp.expect_eq(v2, vec2(wptype(1), wptype(2)))
2975
- wp.expect_eq(v3, vec3(wptype(1), wptype(2), wptype(3)))
2976
- wp.expect_eq(v4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
2977
- wp.expect_eq(v5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
2978
-
2979
- wp.expect_eq(v2, vec2_equiv(wptype(1), wptype(2)))
2980
- wp.expect_eq(v3, vec3_equiv(wptype(1), wptype(2), wptype(3)))
2981
- wp.expect_eq(v4, vec4_equiv(wptype(1), wptype(2), wptype(3), wptype(4)))
2982
- wp.expect_eq(v5, vec5_equiv(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
2983
-
2984
- kernel = getkernel(check_equivalence, suffix=dtype.__name__)
2985
-
2986
- if register_kernels:
2987
- return
2988
-
2989
- # call kernel with equivalent types
2990
- v2 = vec2_equiv(1, 2)
2991
- v3 = vec3_equiv(1, 2, 3)
2992
- v4 = vec4_equiv(1, 2, 3, 4)
2993
- v5 = vec5_equiv(1, 2, 3, 4, 5)
2994
-
2995
- wp.launch(kernel, dim=1, inputs=[v2, v3, v4, v5], device=device)
2996
-
2997
-
2998
- def test_conversions(test, device, dtype, register_kernels=False):
2999
- def check_vectors_equal(
3000
- v0: wp.vec3,
3001
- v1: wp.vec3,
3002
- v2: wp.vec3,
3003
- v3: wp.vec3,
3004
- ):
3005
- wp.expect_eq(v1, v0)
3006
- wp.expect_eq(v2, v0)
3007
- wp.expect_eq(v3, v0)
3008
-
3009
- kernel = getkernel(check_vectors_equal, suffix=dtype.__name__)
3010
-
3011
- if register_kernels:
3012
- return
3013
-
3014
- v0 = wp.vec3(1, 2, 3)
3015
-
3016
- # test explicit conversions - constructing vectors from different containers
3017
- v1 = wp.vec3((1, 2, 3))
3018
- v2 = wp.vec3([1, 2, 3])
3019
- v3 = wp.vec3(np.array([1, 2, 3], dtype=dtype))
3020
-
3021
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
3022
-
3023
- # test implicit conversions - passing different containers as vectors to wp.launch()
3024
- v1 = (1, 2, 3)
3025
- v2 = [1, 2, 3]
3026
- v3 = np.array([1, 2, 3], dtype=dtype)
3027
-
3028
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
3029
-
3030
-
3031
1090
  @wp.kernel
3032
1091
  def test_vector_constructor_value_func():
3033
1092
  a = wp.vec2()
@@ -3097,172 +1156,113 @@ def test_constructors_constant_length():
3097
1156
  v[i] = float(i)
3098
1157
 
3099
1158
 
3100
- def register(parent):
3101
- devices = get_test_devices()
1159
+ devices = get_test_devices()
3102
1160
 
3103
- class TestVec(parent):
3104
- pass
3105
1161
 
3106
- add_kernel_test(TestVec, test_vector_constructor_value_func, dim=1, devices=devices)
3107
- add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
3108
- add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
3109
- add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
3110
-
3111
- vec10 = wp.types.vector(length=10, dtype=float)
3112
- add_kernel_test(
3113
- TestVec,
3114
- test_vector_mutation,
3115
- dim=1,
3116
- inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
3117
- devices=devices,
3118
- )
1162
+ class TestVec(unittest.TestCase):
1163
+ pass
3119
1164
 
3120
- for dtype in np_unsigned_int_types:
3121
- add_function_test_register_kernel(
3122
- TestVec,
3123
- f"test_subtraction_unsigned_{dtype.__name__}",
3124
- test_subtraction_unsigned,
3125
- devices=devices,
3126
- dtype=dtype,
3127
- )
3128
1165
 
3129
- for dtype in np_signed_int_types + np_float_types:
3130
- add_function_test_register_kernel(
3131
- TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
3132
- )
3133
- add_function_test_register_kernel(
3134
- TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
3135
- )
1166
+ add_kernel_test(TestVec, test_vector_constructor_value_func, dim=1, devices=devices)
1167
+ add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
1168
+ add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
1169
+ add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
3136
1170
 
3137
- for dtype in np_float_types:
3138
- add_function_test_register_kernel(
3139
- TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
3140
- )
3141
- add_function_test_register_kernel(
3142
- TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
3143
- )
3144
- add_function_test_register_kernel(
3145
- TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
3146
- )
3147
- add_function_test_register_kernel(
3148
- TestVec,
3149
- f"test_casting_constructors_{dtype.__name__}",
3150
- test_casting_constructors,
3151
- devices=devices,
3152
- dtype=dtype,
3153
- )
1171
+ vec10 = wp.types.vector(length=10, dtype=float)
1172
+ add_kernel_test(
1173
+ TestVec,
1174
+ test_vector_mutation,
1175
+ dim=1,
1176
+ inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
1177
+ devices=devices,
1178
+ )
3154
1179
 
3155
- add_function_test(
3156
- TestVec,
3157
- "test_anon_constructor_error_dtype_keyword_missing",
3158
- test_anon_constructor_error_dtype_keyword_missing,
3159
- devices=devices,
3160
- )
3161
- add_function_test(
1180
+ for dtype in np_unsigned_int_types:
1181
+ add_function_test_register_kernel(
3162
1182
  TestVec,
3163
- "test_anon_constructor_error_length_mismatch",
3164
- test_anon_constructor_error_length_mismatch,
1183
+ f"test_subtraction_unsigned_{dtype.__name__}",
1184
+ test_subtraction_unsigned,
3165
1185
  devices=devices,
1186
+ dtype=dtype,
3166
1187
  )
3167
- add_function_test(
3168
- TestVec,
3169
- "test_anon_constructor_error_numeric_arg_missing_1",
3170
- test_anon_constructor_error_numeric_arg_missing_1,
3171
- devices=devices,
1188
+
1189
+ for dtype in np_signed_int_types + np_float_types:
1190
+ add_function_test_register_kernel(
1191
+ TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
3172
1192
  )
3173
- add_function_test(
3174
- TestVec,
3175
- "test_anon_constructor_error_numeric_arg_missing_2",
3176
- test_anon_constructor_error_numeric_arg_missing_2,
3177
- devices=devices,
1193
+ add_function_test_register_kernel(
1194
+ TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
3178
1195
  )
3179
- add_function_test(
3180
- TestVec,
3181
- "test_anon_constructor_error_dtype_keyword_extraneous",
3182
- test_anon_constructor_error_dtype_keyword_extraneous,
3183
- devices=devices,
1196
+
1197
+ for dtype in np_float_types:
1198
+ add_function_test_register_kernel(
1199
+ TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
3184
1200
  )
3185
- add_function_test(
3186
- TestVec,
3187
- "test_anon_constructor_error_numeric_args_mismatch",
3188
- test_anon_constructor_error_numeric_args_mismatch,
3189
- devices=devices,
1201
+ add_function_test_register_kernel(
1202
+ TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
3190
1203
  )
3191
- add_function_test(
3192
- TestVec,
3193
- "test_tpl_constructor_error_incompatible_sizes",
3194
- test_tpl_constructor_error_incompatible_sizes,
3195
- devices=devices,
1204
+ add_function_test_register_kernel(
1205
+ TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
3196
1206
  )
3197
- add_function_test(
1207
+ add_function_test_register_kernel(
3198
1208
  TestVec,
3199
- "test_tpl_constructor_error_numeric_args_mismatch",
3200
- test_tpl_constructor_error_numeric_args_mismatch,
1209
+ f"test_casting_constructors_{dtype.__name__}",
1210
+ test_casting_constructors,
3201
1211
  devices=devices,
1212
+ dtype=dtype,
3202
1213
  )
3203
- add_function_test(TestVec, "test_tpl_ops_with_anon", test_tpl_ops_with_anon)
3204
-
3205
- for dtype in np_scalar_types:
3206
- add_function_test(TestVec, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
3207
- add_function_test(TestVec, f"test_components_{dtype.__name__}", test_components, devices=None, dtype=dtype)
3208
- add_function_test_register_kernel(
3209
- TestVec, f"test_constructors_{dtype.__name__}", test_constructors, devices=devices, dtype=dtype
3210
- )
3211
- add_function_test_register_kernel(
3212
- TestVec, f"test_anon_type_instance_{dtype.__name__}", test_anon_type_instance, devices=devices, dtype=dtype
3213
- )
3214
- add_function_test_register_kernel(
3215
- TestVec, f"test_indexing_{dtype.__name__}", test_indexing, devices=devices, dtype=dtype
3216
- )
3217
- add_function_test_register_kernel(
3218
- TestVec, f"test_equality_{dtype.__name__}", test_equality, devices=devices, dtype=dtype
3219
- )
3220
- add_function_test_register_kernel(
3221
- TestVec,
3222
- f"test_scalar_multiplication_{dtype.__name__}",
3223
- test_scalar_multiplication,
3224
- devices=devices,
3225
- dtype=dtype,
3226
- )
3227
- add_function_test_register_kernel(
3228
- TestVec,
3229
- f"test_scalar_multiplication_rightmul_{dtype.__name__}",
3230
- test_scalar_multiplication_rightmul,
3231
- devices=devices,
3232
- dtype=dtype,
3233
- )
3234
- add_function_test_register_kernel(
3235
- TestVec, f"test_cw_multiplication_{dtype.__name__}", test_cw_multiplication, devices=devices, dtype=dtype
3236
- )
3237
- add_function_test_register_kernel(
3238
- TestVec, f"test_scalar_division_{dtype.__name__}", test_scalar_division, devices=devices, dtype=dtype
3239
- )
3240
- add_function_test_register_kernel(
3241
- TestVec, f"test_cw_division_{dtype.__name__}", test_cw_division, devices=devices, dtype=dtype
3242
- )
3243
- add_function_test_register_kernel(
3244
- TestVec, f"test_addition_{dtype.__name__}", test_addition, devices=devices, dtype=dtype
3245
- )
3246
- add_function_test_register_kernel(
3247
- TestVec, f"test_dotproduct_{dtype.__name__}", test_dotproduct, devices=devices, dtype=dtype
3248
- )
3249
- add_function_test_register_kernel(
3250
- TestVec, f"test_equivalent_types_{dtype.__name__}", test_equivalent_types, devices=devices, dtype=dtype
3251
- )
3252
- add_function_test_register_kernel(
3253
- TestVec, f"test_conversions_{dtype.__name__}", test_conversions, devices=devices, dtype=dtype
3254
- )
3255
- add_function_test_register_kernel(
3256
- TestVec, f"test_constants_{dtype.__name__}", test_constants, devices=devices, dtype=dtype
3257
- )
3258
-
3259
- # the kernels in this test compile incredibly slowly...
3260
- # add_function_test_register_kernel(TestVec, f"test_minmax_{dtype.__name__}", test_minmax, devices=devices, dtype=dtype)
3261
1214
 
3262
- return TestVec
1215
+ add_function_test(
1216
+ TestVec,
1217
+ "test_anon_constructor_error_dtype_keyword_missing",
1218
+ test_anon_constructor_error_dtype_keyword_missing,
1219
+ devices=devices,
1220
+ )
1221
+ add_function_test(
1222
+ TestVec,
1223
+ "test_anon_constructor_error_length_mismatch",
1224
+ test_anon_constructor_error_length_mismatch,
1225
+ devices=devices,
1226
+ )
1227
+ add_function_test(
1228
+ TestVec,
1229
+ "test_anon_constructor_error_numeric_arg_missing_1",
1230
+ test_anon_constructor_error_numeric_arg_missing_1,
1231
+ devices=devices,
1232
+ )
1233
+ add_function_test(
1234
+ TestVec,
1235
+ "test_anon_constructor_error_numeric_arg_missing_2",
1236
+ test_anon_constructor_error_numeric_arg_missing_2,
1237
+ devices=devices,
1238
+ )
1239
+ add_function_test(
1240
+ TestVec,
1241
+ "test_anon_constructor_error_dtype_keyword_extraneous",
1242
+ test_anon_constructor_error_dtype_keyword_extraneous,
1243
+ devices=devices,
1244
+ )
1245
+ add_function_test(
1246
+ TestVec,
1247
+ "test_anon_constructor_error_numeric_args_mismatch",
1248
+ test_anon_constructor_error_numeric_args_mismatch,
1249
+ devices=devices,
1250
+ )
1251
+ add_function_test(
1252
+ TestVec,
1253
+ "test_tpl_constructor_error_incompatible_sizes",
1254
+ test_tpl_constructor_error_incompatible_sizes,
1255
+ devices=devices,
1256
+ )
1257
+ add_function_test(
1258
+ TestVec,
1259
+ "test_tpl_constructor_error_numeric_args_mismatch",
1260
+ test_tpl_constructor_error_numeric_args_mismatch,
1261
+ devices=devices,
1262
+ )
1263
+ add_function_test(TestVec, "test_tpl_ops_with_anon", test_tpl_ops_with_anon)
3263
1264
 
3264
1265
 
3265
1266
  if __name__ == "__main__":
3266
1267
  wp.build.clear_kernel_cache()
3267
- _ = register(unittest.TestCase)
3268
1268
  unittest.main(verbosity=2, failfast=True)