warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show
  1. docs/conf.py +17 -5
  2. examples/env/env_ant.py +1 -1
  3. examples/env/env_cartpole.py +1 -1
  4. examples/env/env_humanoid.py +1 -1
  5. examples/env/env_usd.py +4 -1
  6. examples/env/environment.py +8 -9
  7. examples/example_dem.py +34 -33
  8. examples/example_diffray.py +364 -337
  9. examples/example_fluid.py +32 -23
  10. examples/example_jacobian_ik.py +97 -93
  11. examples/example_marching_cubes.py +6 -16
  12. examples/example_mesh.py +6 -16
  13. examples/example_mesh_intersect.py +16 -14
  14. examples/example_nvdb.py +14 -16
  15. examples/example_raycast.py +14 -13
  16. examples/example_raymarch.py +16 -23
  17. examples/example_render_opengl.py +19 -10
  18. examples/example_sim_cartpole.py +82 -78
  19. examples/example_sim_cloth.py +45 -48
  20. examples/example_sim_fk_grad.py +51 -44
  21. examples/example_sim_fk_grad_torch.py +47 -40
  22. examples/example_sim_grad_bounce.py +108 -133
  23. examples/example_sim_grad_cloth.py +99 -113
  24. examples/example_sim_granular.py +5 -6
  25. examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
  26. examples/example_sim_neo_hookean.py +51 -55
  27. examples/example_sim_particle_chain.py +4 -4
  28. examples/example_sim_quadruped.py +126 -81
  29. examples/example_sim_rigid_chain.py +54 -61
  30. examples/example_sim_rigid_contact.py +66 -70
  31. examples/example_sim_rigid_fem.py +3 -3
  32. examples/example_sim_rigid_force.py +1 -1
  33. examples/example_sim_rigid_gyroscopic.py +3 -4
  34. examples/example_sim_rigid_kinematics.py +28 -39
  35. examples/example_sim_trajopt.py +112 -110
  36. examples/example_sph.py +9 -8
  37. examples/example_wave.py +7 -7
  38. examples/fem/bsr_utils.py +30 -17
  39. examples/fem/example_apic_fluid.py +85 -69
  40. examples/fem/example_convection_diffusion.py +97 -93
  41. examples/fem/example_convection_diffusion_dg.py +142 -149
  42. examples/fem/example_convection_diffusion_dg0.py +141 -136
  43. examples/fem/example_deformed_geometry.py +146 -0
  44. examples/fem/example_diffusion.py +115 -84
  45. examples/fem/example_diffusion_3d.py +116 -86
  46. examples/fem/example_diffusion_mgpu.py +102 -79
  47. examples/fem/example_mixed_elasticity.py +139 -100
  48. examples/fem/example_navier_stokes.py +175 -162
  49. examples/fem/example_stokes.py +143 -111
  50. examples/fem/example_stokes_transfer.py +186 -157
  51. examples/fem/mesh_utils.py +59 -97
  52. examples/fem/plot_utils.py +138 -17
  53. tools/ci/publishing/build_nodes_info.py +54 -0
  54. warp/__init__.py +4 -3
  55. warp/__init__.pyi +1 -0
  56. warp/bin/warp-clang.dll +0 -0
  57. warp/bin/warp.dll +0 -0
  58. warp/build.py +5 -3
  59. warp/build_dll.py +29 -9
  60. warp/builtins.py +836 -492
  61. warp/codegen.py +864 -553
  62. warp/config.py +3 -1
  63. warp/context.py +389 -172
  64. warp/fem/__init__.py +24 -6
  65. warp/fem/cache.py +318 -25
  66. warp/fem/dirichlet.py +7 -3
  67. warp/fem/domain.py +14 -0
  68. warp/fem/field/__init__.py +30 -38
  69. warp/fem/field/field.py +149 -0
  70. warp/fem/field/nodal_field.py +244 -138
  71. warp/fem/field/restriction.py +8 -6
  72. warp/fem/field/test.py +127 -59
  73. warp/fem/field/trial.py +117 -60
  74. warp/fem/geometry/__init__.py +5 -1
  75. warp/fem/geometry/deformed_geometry.py +271 -0
  76. warp/fem/geometry/element.py +24 -1
  77. warp/fem/geometry/geometry.py +86 -14
  78. warp/fem/geometry/grid_2d.py +112 -54
  79. warp/fem/geometry/grid_3d.py +134 -65
  80. warp/fem/geometry/hexmesh.py +953 -0
  81. warp/fem/geometry/partition.py +85 -33
  82. warp/fem/geometry/quadmesh_2d.py +532 -0
  83. warp/fem/geometry/tetmesh.py +451 -115
  84. warp/fem/geometry/trimesh_2d.py +197 -92
  85. warp/fem/integrate.py +534 -268
  86. warp/fem/operator.py +58 -31
  87. warp/fem/polynomial.py +11 -0
  88. warp/fem/quadrature/__init__.py +1 -1
  89. warp/fem/quadrature/pic_quadrature.py +150 -58
  90. warp/fem/quadrature/quadrature.py +209 -57
  91. warp/fem/space/__init__.py +230 -53
  92. warp/fem/space/basis_space.py +489 -0
  93. warp/fem/space/collocated_function_space.py +105 -0
  94. warp/fem/space/dof_mapper.py +49 -2
  95. warp/fem/space/function_space.py +90 -39
  96. warp/fem/space/grid_2d_function_space.py +149 -496
  97. warp/fem/space/grid_3d_function_space.py +173 -538
  98. warp/fem/space/hexmesh_function_space.py +352 -0
  99. warp/fem/space/partition.py +129 -76
  100. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  101. warp/fem/space/restriction.py +46 -34
  102. warp/fem/space/shape/__init__.py +15 -0
  103. warp/fem/space/shape/cube_shape_function.py +738 -0
  104. warp/fem/space/shape/shape_function.py +103 -0
  105. warp/fem/space/shape/square_shape_function.py +611 -0
  106. warp/fem/space/shape/tet_shape_function.py +567 -0
  107. warp/fem/space/shape/triangle_shape_function.py +429 -0
  108. warp/fem/space/tetmesh_function_space.py +132 -1039
  109. warp/fem/space/topology.py +295 -0
  110. warp/fem/space/trimesh_2d_function_space.py +104 -742
  111. warp/fem/types.py +13 -11
  112. warp/fem/utils.py +335 -60
  113. warp/native/array.h +120 -34
  114. warp/native/builtin.h +101 -72
  115. warp/native/bvh.cpp +73 -325
  116. warp/native/bvh.cu +406 -23
  117. warp/native/bvh.h +22 -40
  118. warp/native/clang/clang.cpp +1 -0
  119. warp/native/crt.h +2 -0
  120. warp/native/cuda_util.cpp +8 -3
  121. warp/native/cuda_util.h +1 -0
  122. warp/native/exports.h +1522 -1243
  123. warp/native/intersect.h +19 -4
  124. warp/native/intersect_adj.h +8 -8
  125. warp/native/mat.h +76 -17
  126. warp/native/mesh.cpp +33 -108
  127. warp/native/mesh.cu +114 -18
  128. warp/native/mesh.h +395 -40
  129. warp/native/noise.h +272 -329
  130. warp/native/quat.h +51 -8
  131. warp/native/rand.h +44 -34
  132. warp/native/reduce.cpp +1 -1
  133. warp/native/sparse.cpp +4 -4
  134. warp/native/sparse.cu +163 -155
  135. warp/native/spatial.h +2 -2
  136. warp/native/temp_buffer.h +18 -14
  137. warp/native/vec.h +103 -21
  138. warp/native/warp.cpp +2 -1
  139. warp/native/warp.cu +28 -3
  140. warp/native/warp.h +4 -3
  141. warp/render/render_opengl.py +261 -109
  142. warp/sim/__init__.py +1 -2
  143. warp/sim/articulation.py +385 -185
  144. warp/sim/import_mjcf.py +59 -48
  145. warp/sim/import_urdf.py +15 -15
  146. warp/sim/import_usd.py +174 -102
  147. warp/sim/inertia.py +17 -18
  148. warp/sim/integrator_xpbd.py +4 -3
  149. warp/sim/model.py +330 -250
  150. warp/sim/render.py +1 -1
  151. warp/sparse.py +625 -152
  152. warp/stubs.py +341 -309
  153. warp/tape.py +9 -6
  154. warp/tests/__main__.py +3 -6
  155. warp/tests/assets/curlnoise_golden.npy +0 -0
  156. warp/tests/assets/pnoise_golden.npy +0 -0
  157. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  158. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  159. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  160. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  161. warp/tests/aux_test_unresolved_func.py +14 -0
  162. warp/tests/aux_test_unresolved_symbol.py +14 -0
  163. warp/tests/disabled_kinematics.py +239 -0
  164. warp/tests/run_coverage_serial.py +31 -0
  165. warp/tests/test_adam.py +103 -106
  166. warp/tests/test_arithmetic.py +94 -74
  167. warp/tests/test_array.py +82 -101
  168. warp/tests/test_array_reduce.py +57 -23
  169. warp/tests/test_atomic.py +64 -28
  170. warp/tests/test_bool.py +22 -12
  171. warp/tests/test_builtins_resolution.py +1292 -0
  172. warp/tests/test_bvh.py +18 -18
  173. warp/tests/test_closest_point_edge_edge.py +54 -57
  174. warp/tests/test_codegen.py +165 -134
  175. warp/tests/test_compile_consts.py +28 -20
  176. warp/tests/test_conditional.py +108 -24
  177. warp/tests/test_copy.py +10 -12
  178. warp/tests/test_ctypes.py +112 -88
  179. warp/tests/test_dense.py +21 -14
  180. warp/tests/test_devices.py +98 -0
  181. warp/tests/test_dlpack.py +75 -75
  182. warp/tests/test_examples.py +237 -0
  183. warp/tests/test_fabricarray.py +22 -24
  184. warp/tests/test_fast_math.py +15 -11
  185. warp/tests/test_fem.py +1034 -124
  186. warp/tests/test_fp16.py +23 -16
  187. warp/tests/test_func.py +187 -86
  188. warp/tests/test_generics.py +194 -49
  189. warp/tests/test_grad.py +123 -181
  190. warp/tests/test_grad_customs.py +176 -0
  191. warp/tests/test_hash_grid.py +35 -34
  192. warp/tests/test_import.py +10 -23
  193. warp/tests/test_indexedarray.py +24 -25
  194. warp/tests/test_intersect.py +18 -9
  195. warp/tests/test_large.py +141 -0
  196. warp/tests/test_launch.py +14 -41
  197. warp/tests/test_lerp.py +64 -65
  198. warp/tests/test_lvalue.py +493 -0
  199. warp/tests/test_marching_cubes.py +12 -13
  200. warp/tests/test_mat.py +517 -2898
  201. warp/tests/test_mat_lite.py +115 -0
  202. warp/tests/test_mat_scalar_ops.py +2889 -0
  203. warp/tests/test_math.py +103 -9
  204. warp/tests/test_matmul.py +304 -69
  205. warp/tests/test_matmul_lite.py +410 -0
  206. warp/tests/test_mesh.py +60 -22
  207. warp/tests/test_mesh_query_aabb.py +21 -25
  208. warp/tests/test_mesh_query_point.py +111 -22
  209. warp/tests/test_mesh_query_ray.py +12 -24
  210. warp/tests/test_mlp.py +30 -22
  211. warp/tests/test_model.py +92 -89
  212. warp/tests/test_modules_lite.py +39 -0
  213. warp/tests/test_multigpu.py +88 -114
  214. warp/tests/test_noise.py +12 -11
  215. warp/tests/test_operators.py +16 -20
  216. warp/tests/test_options.py +11 -11
  217. warp/tests/test_pinned.py +17 -18
  218. warp/tests/test_print.py +32 -11
  219. warp/tests/test_quat.py +275 -129
  220. warp/tests/test_rand.py +18 -16
  221. warp/tests/test_reload.py +38 -34
  222. warp/tests/test_rounding.py +50 -43
  223. warp/tests/test_runlength_encode.py +168 -20
  224. warp/tests/test_smoothstep.py +9 -11
  225. warp/tests/test_snippet.py +143 -0
  226. warp/tests/test_sparse.py +261 -63
  227. warp/tests/test_spatial.py +276 -243
  228. warp/tests/test_streams.py +110 -85
  229. warp/tests/test_struct.py +268 -63
  230. warp/tests/test_tape.py +39 -21
  231. warp/tests/test_torch.py +90 -86
  232. warp/tests/test_transient_module.py +10 -12
  233. warp/tests/test_types.py +363 -0
  234. warp/tests/test_utils.py +451 -0
  235. warp/tests/test_vec.py +354 -2050
  236. warp/tests/test_vec_lite.py +73 -0
  237. warp/tests/test_vec_scalar_ops.py +2099 -0
  238. warp/tests/test_volume.py +418 -376
  239. warp/tests/test_volume_write.py +124 -134
  240. warp/tests/unittest_serial.py +35 -0
  241. warp/tests/unittest_suites.py +291 -0
  242. warp/tests/unittest_utils.py +342 -0
  243. warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
  244. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  245. warp/thirdparty/appdirs.py +36 -45
  246. warp/thirdparty/unittest_parallel.py +589 -0
  247. warp/types.py +622 -211
  248. warp/utils.py +54 -393
  249. warp_lang-1.0.0b6.dist-info/METADATA +238 -0
  250. warp_lang-1.0.0b6.dist-info/RECORD +409 -0
  251. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
  252. examples/example_cache_management.py +0 -40
  253. examples/example_multigpu.py +0 -54
  254. examples/example_struct.py +0 -65
  255. examples/fem/example_stokes_transfer_3d.py +0 -210
  256. warp/bin/warp-clang.so +0 -0
  257. warp/bin/warp.so +0 -0
  258. warp/fem/field/discrete_field.py +0 -80
  259. warp/fem/space/nodal_function_space.py +0 -233
  260. warp/tests/test_all.py +0 -223
  261. warp/tests/test_array_scan.py +0 -60
  262. warp/tests/test_base.py +0 -208
  263. warp/tests/test_unresolved_func.py +0 -7
  264. warp/tests/test_unresolved_symbol.py +0 -7
  265. warp_lang-1.0.0b2.dist-info/METADATA +0 -26
  266. warp_lang-1.0.0b2.dist-info/RECORD +0 -380
  267. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  268. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  269. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  270. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
  271. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/tests/test_vec.py CHANGED
@@ -5,9 +5,12 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import unittest
9
+
8
10
  import numpy as np
11
+
9
12
  import warp as wp
10
- from warp.tests.test_base import *
13
+ from warp.tests.unittest_utils import *
11
14
 
12
15
  wp.init()
13
16
 
@@ -27,1635 +30,184 @@ np_unsigned_int_types = [
27
30
  np.ubyte,
28
31
  ]
29
32
 
30
- np_int_types = np_signed_int_types + np_unsigned_int_types
31
-
32
33
  np_float_types = [np.float16, np.float32, np.float64]
33
34
 
34
- np_scalar_types = np_int_types + np_float_types
35
-
36
35
 
37
- def randvals(shape, dtype):
36
+ def randvals(rng, shape, dtype):
38
37
  if dtype in np_float_types:
39
- return np.random.randn(*shape).astype(dtype)
38
+ return rng.standard_normal(size=shape).astype(dtype)
40
39
  elif dtype in [np.int8, np.uint8, np.byte, np.ubyte]:
41
- return np.random.randint(1, 3, size=shape, dtype=dtype)
42
- return np.random.randint(1, 5, size=shape, dtype=dtype)
40
+ return rng.integers(1, high=3, size=shape, dtype=dtype)
41
+ return rng.integers(1, high=5, size=shape, dtype=dtype)
43
42
 
44
43
 
45
44
  kernel_cache = dict()
46
45
 
47
46
 
48
47
  def getkernel(func, suffix=""):
49
- module = wp.get_module(func.__module__)
50
48
  key = func.__name__ + "_" + suffix
51
49
  if key not in kernel_cache:
52
- kernel_cache[key] = wp.Kernel(func=func, key=key, module=module)
50
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
53
51
  return kernel_cache[key]
54
52
 
55
53
 
56
- def get_select_kernel(dtype):
57
- def output_select_kernel_fn(
58
- input: wp.array(dtype=dtype),
59
- index: int,
60
- out: wp.array(dtype=dtype),
61
- ):
62
- out[0] = input[index]
63
-
64
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
65
-
66
-
67
- def get_select_kernel2(dtype):
68
- def output_select_kernel2_fn(
69
- input: wp.array(dtype=dtype, ndim=2),
70
- index0: int,
71
- index1: int,
72
- out: wp.array(dtype=dtype),
73
- ):
74
- out[0] = input[index0, index1]
75
-
76
- return getkernel(output_select_kernel2_fn, suffix=dtype.__name__)
77
-
78
-
79
- def test_arrays(test, device, dtype):
80
- np.random.seed(123)
81
-
82
- tol = {
83
- np.float16: 1.0e-3,
84
- np.float32: 1.0e-6,
85
- np.float64: 1.0e-8,
86
- }.get(dtype, 0)
87
-
88
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
89
- vec2 = wp.types.vector(length=2, dtype=wptype)
90
- vec3 = wp.types.vector(length=3, dtype=wptype)
91
- vec4 = wp.types.vector(length=4, dtype=wptype)
92
- vec5 = wp.types.vector(length=5, dtype=wptype)
93
-
94
- v2_np = randvals((10, 2), dtype)
95
- v3_np = randvals((10, 3), dtype)
96
- v4_np = randvals((10, 4), dtype)
97
- v5_np = randvals((10, 5), dtype)
98
-
99
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
100
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
101
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
102
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
103
-
104
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
105
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
106
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
107
- assert_np_equal(v5.numpy(), v5_np, tol=1.0e-6)
108
-
109
- vec2 = wp.types.vector(length=2, dtype=wptype)
110
- vec3 = wp.types.vector(length=3, dtype=wptype)
111
- vec4 = wp.types.vector(length=4, dtype=wptype)
112
-
113
- v2 = wp.array(v2_np, dtype=vec2, requires_grad=True, device=device)
114
- v3 = wp.array(v3_np, dtype=vec3, requires_grad=True, device=device)
115
- v4 = wp.array(v4_np, dtype=vec4, requires_grad=True, device=device)
116
-
117
- assert_np_equal(v2.numpy(), v2_np, tol=1.0e-6)
118
- assert_np_equal(v3.numpy(), v3_np, tol=1.0e-6)
119
- assert_np_equal(v4.numpy(), v4_np, tol=1.0e-6)
120
-
121
-
122
- def test_components(test, device, dtype):
123
- # test accessing vector components from Python - this is especially important
124
- # for float16, which requires special handling internally
125
-
126
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
127
- vec3 = wp.types.vector(length=3, dtype=wptype)
128
-
129
- v = vec3(1, 2, 3)
130
-
131
- # test __getitem__ for individual components
132
- test.assertEqual(v[0], 1)
133
- test.assertEqual(v[1], 2)
134
- test.assertEqual(v[2], 3)
135
-
136
- # test __getitem__ for slices
137
- s = v[:]
138
- test.assertEqual(s[0], 1)
139
- test.assertEqual(s[1], 2)
140
- test.assertEqual(s[2], 3)
141
-
142
- s = v[1:]
143
- test.assertEqual(s[0], 2)
144
- test.assertEqual(s[1], 3)
145
-
146
- s = v[:2]
147
- test.assertEqual(s[0], 1)
148
- test.assertEqual(s[1], 2)
149
-
150
- s = v[::2]
151
- test.assertEqual(s[0], 1)
152
- test.assertEqual(s[1], 3)
153
-
154
- # test __setitem__ for individual components
155
- v[0] = 4
156
- v[1] = 5
157
- v[2] = 6
158
- test.assertEqual(v[0], 4)
159
- test.assertEqual(v[1], 5)
160
- test.assertEqual(v[2], 6)
161
-
162
- # test __setitem__ for slices
163
- v[:] = [7, 8, 9]
164
- test.assertEqual(v[0], 7)
165
- test.assertEqual(v[1], 8)
166
- test.assertEqual(v[2], 9)
167
-
168
- v[1:] = [10, 11]
169
- test.assertEqual(v[0], 7)
170
- test.assertEqual(v[1], 10)
171
- test.assertEqual(v[2], 11)
172
-
173
- v[:2] = [12, 13]
174
- test.assertEqual(v[0], 12)
175
- test.assertEqual(v[1], 13)
176
- test.assertEqual(v[2], 11)
54
+ def test_anon_constructor_error_dtype_keyword_missing(test, device):
55
+ @wp.kernel
56
+ def kernel():
57
+ wp.vector(length=123)
177
58
 
178
- v[::2] = [14, 15]
179
- test.assertEqual(v[0], 14)
180
- test.assertEqual(v[1], 13)
181
- test.assertEqual(v[2], 15)
182
-
183
-
184
- def test_anon_type_instance(test, device, dtype, register_kernels=False):
185
- np.random.seed(123)
186
-
187
- tol = {
188
- np.float16: 5.0e-3,
189
- np.float32: 1.0e-6,
190
- np.float64: 1.0e-8,
191
- }.get(dtype, 0)
192
-
193
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
194
-
195
- def check_scalar_init(
196
- input: wp.array(dtype=wptype),
197
- output: wp.array(dtype=wptype),
198
- ):
199
- v2result = wp.vector(input[0], length=2)
200
- v3result = wp.vector(input[1], length=3)
201
- v4result = wp.vector(input[2], length=4)
202
- v5result = wp.vector(input[3], length=5)
203
-
204
- idx = 0
205
- for i in range(2):
206
- output[idx] = wptype(2) * v2result[i]
207
- idx = idx + 1
208
- for i in range(3):
209
- output[idx] = wptype(2) * v3result[i]
210
- idx = idx + 1
211
- for i in range(4):
212
- output[idx] = wptype(2) * v4result[i]
213
- idx = idx + 1
214
- for i in range(5):
215
- output[idx] = wptype(2) * v5result[i]
216
- idx = idx + 1
217
-
218
- def check_component_init(
219
- input: wp.array(dtype=wptype),
220
- output: wp.array(dtype=wptype),
59
+ with test.assertRaisesRegex(
60
+ RuntimeError,
61
+ r"vec\(\) must have dtype as a keyword argument if it has no positional arguments, e.g.: wp.vector\(length=5, dtype=wp.float32\)$",
221
62
  ):
222
- v2result = wp.vector(input[0], input[1])
223
- v3result = wp.vector(input[2], input[3], input[4])
224
- v4result = wp.vector(input[5], input[6], input[7], input[8])
225
- v5result = wp.vector(input[9], input[10], input[11], input[12], input[13])
226
-
227
- idx = 0
228
- for i in range(2):
229
- output[idx] = wptype(2) * v2result[i]
230
- idx = idx + 1
231
- for i in range(3):
232
- output[idx] = wptype(2) * v3result[i]
233
- idx = idx + 1
234
- for i in range(4):
235
- output[idx] = wptype(2) * v4result[i]
236
- idx = idx + 1
237
- for i in range(5):
238
- output[idx] = wptype(2) * v5result[i]
239
- idx = idx + 1
240
-
241
- scalar_kernel = getkernel(check_scalar_init, suffix=dtype.__name__)
242
- component_kernel = getkernel(check_component_init, suffix=dtype.__name__)
243
- output_select_kernel = get_select_kernel(wptype)
244
-
245
- if register_kernels:
246
- return
247
-
248
- input = wp.array(randvals([4], dtype), requires_grad=True, device=device)
249
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
250
-
251
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
252
-
253
- assert_np_equal(output.numpy()[:2], 2 * np.array([input.numpy()[0]] * 2), tol=1.0e-6)
254
- assert_np_equal(output.numpy()[2:5], 2 * np.array([input.numpy()[1]] * 3), tol=1.0e-6)
255
- assert_np_equal(output.numpy()[5:9], 2 * np.array([input.numpy()[2]] * 4), tol=1.0e-6)
256
- assert_np_equal(output.numpy()[9:], 2 * np.array([input.numpy()[3]] * 5), tol=1.0e-6)
257
-
258
- if dtype in np_float_types:
259
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
260
- for i in range(len(output)):
261
- tape = wp.Tape()
262
- with tape:
263
- wp.launch(scalar_kernel, dim=1, inputs=[input], outputs=[output], device=device)
264
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
265
-
266
- tape.backward(loss=out)
267
- expected = np.zeros_like(input.numpy())
268
- if i < 2:
269
- expected[0] = 2
270
- elif i < 5:
271
- expected[1] = 2
272
- elif i < 9:
273
- expected[2] = 2
274
- else:
275
- expected[3] = 2
276
-
277
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
278
-
279
- tape.reset()
280
- tape.zero()
281
-
282
- input = wp.array(randvals([2 + 3 + 4 + 5], dtype), requires_grad=True, device=device)
283
- output = wp.zeros(2 + 3 + 4 + 5, dtype=wptype, requires_grad=True, device=device)
284
-
285
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
286
-
287
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=1.0e-6)
288
-
289
- if dtype in np_float_types:
290
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
291
- for i in range(len(output)):
292
- tape = wp.Tape()
293
- with tape:
294
- wp.launch(component_kernel, dim=1, inputs=[input], outputs=[output], device=device)
295
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[out], device=device)
296
-
297
- tape.backward(loss=out)
298
- expected = np.zeros_like(input.numpy())
299
- expected[i] = 2
300
-
301
- assert_np_equal(tape.gradients[input].numpy(), expected, tol=tol)
302
-
303
- tape.reset()
304
- tape.zero()
305
-
306
-
307
- def test_constants(test, device, dtype, register_kernels=False):
308
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
309
- vec2 = wp.types.vector(length=2, dtype=wptype)
310
- vec3 = wp.types.vector(length=3, dtype=wptype)
311
- vec4 = wp.types.vector(length=4, dtype=wptype)
312
- vec5 = wp.types.vector(length=5, dtype=wptype)
313
-
314
- cv2 = wp.constant(vec2(1, 2))
315
- cv3 = wp.constant(vec3(1, 2, 3))
316
- cv4 = wp.constant(vec4(1, 2, 3, 4))
317
- cv5 = wp.constant(vec5(1, 2, 3, 4, 5))
318
-
319
- def check_vector_constants():
320
- wp.expect_eq(cv2, vec2(wptype(1), wptype(2)))
321
- wp.expect_eq(cv3, vec3(wptype(1), wptype(2), wptype(3)))
322
- wp.expect_eq(cv4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
323
- wp.expect_eq(cv5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
324
-
325
- kernel = getkernel(check_vector_constants, suffix=dtype.__name__)
326
-
327
- if register_kernels:
328
- return
329
-
330
- wp.launch(kernel, dim=1, inputs=[])
331
-
332
-
333
- def test_constructors(test, device, dtype, register_kernels=False):
334
- np.random.seed(123)
63
+ wp.launch(
64
+ kernel,
65
+ dim=1,
66
+ inputs=[],
67
+ device=device,
68
+ )
335
69
 
336
- tol = {
337
- np.float16: 5.0e-3,
338
- np.float32: 1.0e-6,
339
- np.float64: 1.0e-8,
340
- }.get(dtype, 0)
341
70
 
342
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
343
- vec2 = wp.types.vector(length=2, dtype=wptype)
344
- vec3 = wp.types.vector(length=3, dtype=wptype)
345
- vec4 = wp.types.vector(length=4, dtype=wptype)
346
- vec5 = wp.types.vector(length=5, dtype=wptype)
71
+ def test_anon_constructor_error_length_mismatch(test, device):
72
+ @wp.kernel
73
+ def kernel():
74
+ wp.vector(
75
+ wp.vector(length=2, dtype=float),
76
+ length=3,
77
+ dtype=float,
78
+ )
347
79
 
348
- def check_scalar_constructor(
349
- input: wp.array(dtype=wptype),
350
- v2: wp.array(dtype=vec2),
351
- v3: wp.array(dtype=vec3),
352
- v4: wp.array(dtype=vec4),
353
- v5: wp.array(dtype=vec5),
354
- v20: wp.array(dtype=wptype),
355
- v21: wp.array(dtype=wptype),
356
- v30: wp.array(dtype=wptype),
357
- v31: wp.array(dtype=wptype),
358
- v32: wp.array(dtype=wptype),
359
- v40: wp.array(dtype=wptype),
360
- v41: wp.array(dtype=wptype),
361
- v42: wp.array(dtype=wptype),
362
- v43: wp.array(dtype=wptype),
363
- v50: wp.array(dtype=wptype),
364
- v51: wp.array(dtype=wptype),
365
- v52: wp.array(dtype=wptype),
366
- v53: wp.array(dtype=wptype),
367
- v54: wp.array(dtype=wptype),
80
+ with test.assertRaisesRegex(
81
+ RuntimeError,
82
+ r"Incompatible vector lengths for casting copy constructor, 3 vs 2$",
368
83
  ):
369
- v2result = vec2(input[0])
370
- v3result = vec3(input[0])
371
- v4result = vec4(input[0])
372
- v5result = vec5(input[0])
373
-
374
- v2[0] = v2result
375
- v3[0] = v3result
376
- v4[0] = v4result
377
- v5[0] = v5result
378
-
379
- # multiply outputs by 2 so we've got something to backpropagate
380
- v20[0] = wptype(2) * v2result[0]
381
- v21[0] = wptype(2) * v2result[1]
382
-
383
- v30[0] = wptype(2) * v3result[0]
384
- v31[0] = wptype(2) * v3result[1]
385
- v32[0] = wptype(2) * v3result[2]
84
+ wp.launch(
85
+ kernel,
86
+ dim=1,
87
+ inputs=[],
88
+ device=device,
89
+ )
386
90
 
387
- v40[0] = wptype(2) * v4result[0]
388
- v41[0] = wptype(2) * v4result[1]
389
- v42[0] = wptype(2) * v4result[2]
390
- v43[0] = wptype(2) * v4result[3]
391
91
 
392
- v50[0] = wptype(2) * v5result[0]
393
- v51[0] = wptype(2) * v5result[1]
394
- v52[0] = wptype(2) * v5result[2]
395
- v53[0] = wptype(2) * v5result[3]
396
- v54[0] = wptype(2) * v5result[4]
92
+ def test_anon_constructor_error_numeric_arg_missing_1(test, device):
93
+ @wp.kernel
94
+ def kernel():
95
+ wp.vector(1.0, 2.0, length=12345)
397
96
 
398
- def check_vector_constructors(
399
- input: wp.array(dtype=wptype),
400
- v2: wp.array(dtype=vec2),
401
- v3: wp.array(dtype=vec3),
402
- v4: wp.array(dtype=vec4),
403
- v5: wp.array(dtype=vec5),
404
- v20: wp.array(dtype=wptype),
405
- v21: wp.array(dtype=wptype),
406
- v30: wp.array(dtype=wptype),
407
- v31: wp.array(dtype=wptype),
408
- v32: wp.array(dtype=wptype),
409
- v40: wp.array(dtype=wptype),
410
- v41: wp.array(dtype=wptype),
411
- v42: wp.array(dtype=wptype),
412
- v43: wp.array(dtype=wptype),
413
- v50: wp.array(dtype=wptype),
414
- v51: wp.array(dtype=wptype),
415
- v52: wp.array(dtype=wptype),
416
- v53: wp.array(dtype=wptype),
417
- v54: wp.array(dtype=wptype),
97
+ with test.assertRaisesRegex(
98
+ RuntimeError,
99
+ r"vec\(\) must have one scalar argument or the dtype keyword argument if the length keyword argument is specified, e.g.: wp.vec\(1.0, length=5\)$",
418
100
  ):
419
- v2result = vec2(input[0], input[1])
420
- v3result = vec3(input[2], input[3], input[4])
421
- v4result = vec4(input[5], input[6], input[7], input[8])
422
- v5result = vec5(input[9], input[10], input[11], input[12], input[13])
423
-
424
- v2[0] = v2result
425
- v3[0] = v3result
426
- v4[0] = v4result
427
- v5[0] = v5result
428
-
429
- # multiply the output by 2 so we've got something to backpropagate:
430
- v20[0] = wptype(2) * v2result[0]
431
- v21[0] = wptype(2) * v2result[1]
432
-
433
- v30[0] = wptype(2) * v3result[0]
434
- v31[0] = wptype(2) * v3result[1]
435
- v32[0] = wptype(2) * v3result[2]
436
-
437
- v40[0] = wptype(2) * v4result[0]
438
- v41[0] = wptype(2) * v4result[1]
439
- v42[0] = wptype(2) * v4result[2]
440
- v43[0] = wptype(2) * v4result[3]
441
-
442
- v50[0] = wptype(2) * v5result[0]
443
- v51[0] = wptype(2) * v5result[1]
444
- v52[0] = wptype(2) * v5result[2]
445
- v53[0] = wptype(2) * v5result[3]
446
- v54[0] = wptype(2) * v5result[4]
447
-
448
- vec_kernel = getkernel(check_vector_constructors, suffix=dtype.__name__)
449
- kernel = getkernel(check_scalar_constructor, suffix=dtype.__name__)
450
-
451
- if register_kernels:
452
- return
453
-
454
- input = wp.array(randvals([1], dtype), requires_grad=True, device=device)
455
- v2 = wp.zeros(1, dtype=vec2, device=device)
456
- v3 = wp.zeros(1, dtype=vec3, device=device)
457
- v4 = wp.zeros(1, dtype=vec4, device=device)
458
- v5 = wp.zeros(1, dtype=vec5, device=device)
459
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
460
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
461
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
462
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
463
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
464
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
465
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
466
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
467
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
469
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
470
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
471
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
472
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
473
-
474
- tape = wp.Tape()
475
- with tape:
476
101
  wp.launch(
477
102
  kernel,
478
103
  dim=1,
479
- inputs=[input],
480
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
104
+ inputs=[],
481
105
  device=device,
482
106
  )
483
107
 
484
- if dtype in np_float_types:
485
- for l in [v20, v21]:
486
- tape.backward(loss=l)
487
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
488
- tape.zero()
489
-
490
- for l in [v30, v31, v32]:
491
- tape.backward(loss=l)
492
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
493
- tape.zero()
494
-
495
- for l in [v40, v41, v42, v43]:
496
- tape.backward(loss=l)
497
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
498
- tape.zero()
499
108
 
500
- for l in [v50, v51, v52, v53, v54]:
501
- tape.backward(loss=l)
502
- test.assertEqual(tape.gradients[input].numpy()[0], 2.0)
503
- tape.zero()
109
+ def test_anon_constructor_error_numeric_arg_missing_2(test, device):
110
+ @wp.kernel
111
+ def kernel():
112
+ wp.vector()
504
113
 
505
- val = input.numpy()[0]
506
- assert_np_equal(v2.numpy()[0], np.array([val, val]), tol=1.0e-6)
507
- assert_np_equal(v3.numpy()[0], np.array([val, val, val]), tol=1.0e-6)
508
- assert_np_equal(v4.numpy()[0], np.array([val, val, val, val]), tol=1.0e-6)
509
- assert_np_equal(v5.numpy()[0], np.array([val, val, val, val, val]), tol=1.0e-6)
510
-
511
- assert_np_equal(v20.numpy()[0], 2 * val, tol=1.0e-6)
512
- assert_np_equal(v21.numpy()[0], 2 * val, tol=1.0e-6)
513
- assert_np_equal(v30.numpy()[0], 2 * val, tol=1.0e-6)
514
- assert_np_equal(v31.numpy()[0], 2 * val, tol=1.0e-6)
515
- assert_np_equal(v32.numpy()[0], 2 * val, tol=1.0e-6)
516
- assert_np_equal(v40.numpy()[0], 2 * val, tol=1.0e-6)
517
- assert_np_equal(v41.numpy()[0], 2 * val, tol=1.0e-6)
518
- assert_np_equal(v42.numpy()[0], 2 * val, tol=1.0e-6)
519
- assert_np_equal(v43.numpy()[0], 2 * val, tol=1.0e-6)
520
- assert_np_equal(v50.numpy()[0], 2 * val, tol=1.0e-6)
521
- assert_np_equal(v51.numpy()[0], 2 * val, tol=1.0e-6)
522
- assert_np_equal(v52.numpy()[0], 2 * val, tol=1.0e-6)
523
- assert_np_equal(v53.numpy()[0], 2 * val, tol=1.0e-6)
524
- assert_np_equal(v54.numpy()[0], 2 * val, tol=1.0e-6)
525
-
526
- input = wp.array(randvals([14], dtype), requires_grad=True, device=device)
527
- tape = wp.Tape()
528
- with tape:
114
+ with test.assertRaisesRegex(
115
+ RuntimeError,
116
+ r"vec\(\) must have at least one numeric argument, if it's length, dtype is not specified$",
117
+ ):
529
118
  wp.launch(
530
- vec_kernel,
119
+ kernel,
531
120
  dim=1,
532
- inputs=[input],
533
- outputs=[v2, v3, v4, v5, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
121
+ inputs=[],
534
122
  device=device,
535
123
  )
536
124
 
537
- if dtype in np_float_types:
538
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
539
- tape.backward(loss=l)
540
- grad = tape.gradients[input].numpy()
541
- expected_grad = np.zeros_like(grad)
542
- expected_grad[i] = 2
543
- assert_np_equal(grad, expected_grad, tol=tol)
544
- tape.zero()
545
-
546
- assert_np_equal(v2.numpy()[0, 0], input.numpy()[0], tol=tol)
547
- assert_np_equal(v2.numpy()[0, 1], input.numpy()[1], tol=tol)
548
- assert_np_equal(v3.numpy()[0, 0], input.numpy()[2], tol=tol)
549
- assert_np_equal(v3.numpy()[0, 1], input.numpy()[3], tol=tol)
550
- assert_np_equal(v3.numpy()[0, 2], input.numpy()[4], tol=tol)
551
- assert_np_equal(v4.numpy()[0, 0], input.numpy()[5], tol=tol)
552
- assert_np_equal(v4.numpy()[0, 1], input.numpy()[6], tol=tol)
553
- assert_np_equal(v4.numpy()[0, 2], input.numpy()[7], tol=tol)
554
- assert_np_equal(v4.numpy()[0, 3], input.numpy()[8], tol=tol)
555
- assert_np_equal(v5.numpy()[0, 0], input.numpy()[9], tol=tol)
556
- assert_np_equal(v5.numpy()[0, 1], input.numpy()[10], tol=tol)
557
- assert_np_equal(v5.numpy()[0, 2], input.numpy()[11], tol=tol)
558
- assert_np_equal(v5.numpy()[0, 3], input.numpy()[12], tol=tol)
559
- assert_np_equal(v5.numpy()[0, 4], input.numpy()[13], tol=tol)
560
-
561
- assert_np_equal(v20.numpy()[0], 2 * input.numpy()[0], tol=tol)
562
- assert_np_equal(v21.numpy()[0], 2 * input.numpy()[1], tol=tol)
563
- assert_np_equal(v30.numpy()[0], 2 * input.numpy()[2], tol=tol)
564
- assert_np_equal(v31.numpy()[0], 2 * input.numpy()[3], tol=tol)
565
- assert_np_equal(v32.numpy()[0], 2 * input.numpy()[4], tol=tol)
566
- assert_np_equal(v40.numpy()[0], 2 * input.numpy()[5], tol=tol)
567
- assert_np_equal(v41.numpy()[0], 2 * input.numpy()[6], tol=tol)
568
- assert_np_equal(v42.numpy()[0], 2 * input.numpy()[7], tol=tol)
569
- assert_np_equal(v43.numpy()[0], 2 * input.numpy()[8], tol=tol)
570
- assert_np_equal(v50.numpy()[0], 2 * input.numpy()[9], tol=tol)
571
- assert_np_equal(v51.numpy()[0], 2 * input.numpy()[10], tol=tol)
572
- assert_np_equal(v52.numpy()[0], 2 * input.numpy()[11], tol=tol)
573
- assert_np_equal(v53.numpy()[0], 2 * input.numpy()[12], tol=tol)
574
- assert_np_equal(v54.numpy()[0], 2 * input.numpy()[13], tol=tol)
575
-
576
-
577
- def test_indexing(test, device, dtype, register_kernels=False):
578
- np.random.seed(123)
579
-
580
- tol = {
581
- np.float16: 5.0e-3,
582
- np.float32: 1.0e-6,
583
- np.float64: 1.0e-8,
584
- }.get(dtype, 0)
585
125
 
586
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
587
- vec2 = wp.types.vector(length=2, dtype=wptype)
588
- vec3 = wp.types.vector(length=3, dtype=wptype)
589
- vec4 = wp.types.vector(length=4, dtype=wptype)
590
- vec5 = wp.types.vector(length=5, dtype=wptype)
126
+ def test_anon_constructor_error_dtype_keyword_extraneous(test, device):
127
+ @wp.kernel
128
+ def kernel():
129
+ wp.vector(1.0, 2.0, 3.0, dtype=float)
591
130
 
592
- def check_indexing(
593
- v2: wp.array(dtype=vec2),
594
- v3: wp.array(dtype=vec3),
595
- v4: wp.array(dtype=vec4),
596
- v5: wp.array(dtype=vec5),
597
- v20: wp.array(dtype=wptype),
598
- v21: wp.array(dtype=wptype),
599
- v30: wp.array(dtype=wptype),
600
- v31: wp.array(dtype=wptype),
601
- v32: wp.array(dtype=wptype),
602
- v40: wp.array(dtype=wptype),
603
- v41: wp.array(dtype=wptype),
604
- v42: wp.array(dtype=wptype),
605
- v43: wp.array(dtype=wptype),
606
- v50: wp.array(dtype=wptype),
607
- v51: wp.array(dtype=wptype),
608
- v52: wp.array(dtype=wptype),
609
- v53: wp.array(dtype=wptype),
610
- v54: wp.array(dtype=wptype),
611
- ):
612
- # multiply outputs by 2 so we've got something to backpropagate:
613
- v20[0] = wptype(2) * v2[0][0]
614
- v21[0] = wptype(2) * v2[0][1]
615
-
616
- v30[0] = wptype(2) * v3[0][0]
617
- v31[0] = wptype(2) * v3[0][1]
618
- v32[0] = wptype(2) * v3[0][2]
619
-
620
- v40[0] = wptype(2) * v4[0][0]
621
- v41[0] = wptype(2) * v4[0][1]
622
- v42[0] = wptype(2) * v4[0][2]
623
- v43[0] = wptype(2) * v4[0][3]
624
-
625
- v50[0] = wptype(2) * v5[0][0]
626
- v51[0] = wptype(2) * v5[0][1]
627
- v52[0] = wptype(2) * v5[0][2]
628
- v53[0] = wptype(2) * v5[0][3]
629
- v54[0] = wptype(2) * v5[0][4]
630
-
631
- kernel = getkernel(check_indexing, suffix=dtype.__name__)
632
-
633
- if register_kernels:
634
- return
635
-
636
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
637
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
638
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
639
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
640
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
641
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
642
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
643
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
644
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
645
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
646
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
647
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
648
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
649
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
650
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
651
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
652
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
653
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
654
-
655
- tape = wp.Tape()
656
- with tape:
657
- wp.launch(
658
- kernel,
659
- dim=1,
660
- inputs=[v2, v3, v4, v5],
661
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
662
- device=device,
663
- )
664
-
665
- if dtype in np_float_types:
666
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
667
- tape.backward(loss=l)
668
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
669
- expected_grads = np.zeros_like(allgrads)
670
- expected_grads[i] = 2
671
- assert_np_equal(allgrads, expected_grads, tol=tol)
672
- tape.zero()
673
-
674
- assert_np_equal(v20.numpy()[0], 2.0 * v2.numpy()[0, 0], tol=tol)
675
- assert_np_equal(v21.numpy()[0], 2.0 * v2.numpy()[0, 1], tol=tol)
676
- assert_np_equal(v30.numpy()[0], 2.0 * v3.numpy()[0, 0], tol=tol)
677
- assert_np_equal(v31.numpy()[0], 2.0 * v3.numpy()[0, 1], tol=tol)
678
- assert_np_equal(v32.numpy()[0], 2.0 * v3.numpy()[0, 2], tol=tol)
679
- assert_np_equal(v40.numpy()[0], 2.0 * v4.numpy()[0, 0], tol=tol)
680
- assert_np_equal(v41.numpy()[0], 2.0 * v4.numpy()[0, 1], tol=tol)
681
- assert_np_equal(v42.numpy()[0], 2.0 * v4.numpy()[0, 2], tol=tol)
682
- assert_np_equal(v43.numpy()[0], 2.0 * v4.numpy()[0, 3], tol=tol)
683
- assert_np_equal(v50.numpy()[0], 2.0 * v5.numpy()[0, 0], tol=tol)
684
- assert_np_equal(v51.numpy()[0], 2.0 * v5.numpy()[0, 1], tol=tol)
685
- assert_np_equal(v52.numpy()[0], 2.0 * v5.numpy()[0, 2], tol=tol)
686
- assert_np_equal(v53.numpy()[0], 2.0 * v5.numpy()[0, 3], tol=tol)
687
- assert_np_equal(v54.numpy()[0], 2.0 * v5.numpy()[0, 4], tol=tol)
688
-
689
-
690
- def test_equality(test, device, dtype, register_kernels=False):
691
- np.random.seed(123)
692
-
693
- tol = {
694
- np.float16: 1.0e-3,
695
- np.float32: 1.0e-6,
696
- np.float64: 1.0e-8,
697
- }.get(dtype, 0)
698
-
699
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
700
- vec2 = wp.types.vector(length=2, dtype=wptype)
701
- vec3 = wp.types.vector(length=3, dtype=wptype)
702
- vec4 = wp.types.vector(length=4, dtype=wptype)
703
- vec5 = wp.types.vector(length=5, dtype=wptype)
704
-
705
- def check_equality(
706
- v20: wp.array(dtype=vec2),
707
- v21: wp.array(dtype=vec2),
708
- v22: wp.array(dtype=vec2),
709
- v30: wp.array(dtype=vec3),
710
- v31: wp.array(dtype=vec3),
711
- v32: wp.array(dtype=vec3),
712
- v33: wp.array(dtype=vec3),
713
- v40: wp.array(dtype=vec4),
714
- v41: wp.array(dtype=vec4),
715
- v42: wp.array(dtype=vec4),
716
- v43: wp.array(dtype=vec4),
717
- v44: wp.array(dtype=vec4),
718
- v50: wp.array(dtype=vec5),
719
- v51: wp.array(dtype=vec5),
720
- v52: wp.array(dtype=vec5),
721
- v53: wp.array(dtype=vec5),
722
- v54: wp.array(dtype=vec5),
723
- v55: wp.array(dtype=vec5),
724
- ):
725
- wp.expect_eq(v20[0], v20[0])
726
- wp.expect_neq(v21[0], v20[0])
727
- wp.expect_neq(v22[0], v20[0])
728
-
729
- wp.expect_eq(v30[0], v30[0])
730
- wp.expect_neq(v31[0], v30[0])
731
- wp.expect_neq(v32[0], v30[0])
732
- wp.expect_neq(v33[0], v30[0])
733
-
734
- wp.expect_eq(v40[0], v40[0])
735
- wp.expect_neq(v41[0], v40[0])
736
- wp.expect_neq(v42[0], v40[0])
737
- wp.expect_neq(v43[0], v40[0])
738
- wp.expect_neq(v44[0], v40[0])
739
-
740
- wp.expect_eq(v50[0], v50[0])
741
- wp.expect_neq(v51[0], v50[0])
742
- wp.expect_neq(v52[0], v50[0])
743
- wp.expect_neq(v53[0], v50[0])
744
- wp.expect_neq(v54[0], v50[0])
745
- wp.expect_neq(v55[0], v50[0])
746
-
747
- kernel = getkernel(check_equality, suffix=dtype.__name__)
748
-
749
- if register_kernels:
750
- return
751
-
752
- v20 = wp.array([1.0, 2.0], dtype=vec2, requires_grad=True, device=device)
753
- v21 = wp.array([1.0, 3.0], dtype=vec2, requires_grad=True, device=device)
754
- v22 = wp.array([3.0, 2.0], dtype=vec2, requires_grad=True, device=device)
755
-
756
- v30 = wp.array([1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
757
- v31 = wp.array([-1.0, 2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
758
- v32 = wp.array([1.0, -2.0, 3.0], dtype=vec3, requires_grad=True, device=device)
759
- v33 = wp.array([1.0, 2.0, -3.0], dtype=vec3, requires_grad=True, device=device)
760
-
761
- v40 = wp.array([1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
762
- v41 = wp.array([-1.0, 2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
763
- v42 = wp.array([1.0, -2.0, 3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
764
- v43 = wp.array([1.0, 2.0, -3.0, 4.0], dtype=vec4, requires_grad=True, device=device)
765
- v44 = wp.array([1.0, 2.0, 3.0, -4.0], dtype=vec4, requires_grad=True, device=device)
766
-
767
- v50 = wp.array([1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
768
- v51 = wp.array([-1.0, 2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
769
- v52 = wp.array([1.0, -2.0, 3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
770
- v53 = wp.array([1.0, 2.0, -3.0, 4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
771
- v54 = wp.array([1.0, 2.0, 3.0, -4.0, 5.0], dtype=vec5, requires_grad=True, device=device)
772
- v55 = wp.array([1.0, 2.0, 3.0, 4.0, -5.0], dtype=vec5, requires_grad=True, device=device)
773
- wp.launch(
774
- kernel,
775
- dim=1,
776
- inputs=[
777
- v20,
778
- v21,
779
- v22,
780
- v30,
781
- v31,
782
- v32,
783
- v33,
784
- v40,
785
- v41,
786
- v42,
787
- v43,
788
- v44,
789
- v50,
790
- v51,
791
- v52,
792
- v53,
793
- v54,
794
- v55,
795
- ],
796
- outputs=[],
797
- device=device,
798
- )
799
-
800
-
801
- def test_negation(test, device, dtype, register_kernels=False):
802
- np.random.seed(123)
803
-
804
- tol = {
805
- np.float16: 5.0e-3,
806
- np.float32: 1.0e-6,
807
- np.float64: 1.0e-8,
808
- }.get(dtype, 0)
809
-
810
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
811
- vec2 = wp.types.vector(length=2, dtype=wptype)
812
- vec3 = wp.types.vector(length=3, dtype=wptype)
813
- vec4 = wp.types.vector(length=4, dtype=wptype)
814
- vec5 = wp.types.vector(length=5, dtype=wptype)
815
-
816
- def check_negation(
817
- v2: wp.array(dtype=vec2),
818
- v3: wp.array(dtype=vec3),
819
- v4: wp.array(dtype=vec4),
820
- v5: wp.array(dtype=vec5),
821
- v2out: wp.array(dtype=vec2),
822
- v3out: wp.array(dtype=vec3),
823
- v4out: wp.array(dtype=vec4),
824
- v5out: wp.array(dtype=vec5),
825
- v20: wp.array(dtype=wptype),
826
- v21: wp.array(dtype=wptype),
827
- v30: wp.array(dtype=wptype),
828
- v31: wp.array(dtype=wptype),
829
- v32: wp.array(dtype=wptype),
830
- v40: wp.array(dtype=wptype),
831
- v41: wp.array(dtype=wptype),
832
- v42: wp.array(dtype=wptype),
833
- v43: wp.array(dtype=wptype),
834
- v50: wp.array(dtype=wptype),
835
- v51: wp.array(dtype=wptype),
836
- v52: wp.array(dtype=wptype),
837
- v53: wp.array(dtype=wptype),
838
- v54: wp.array(dtype=wptype),
839
- ):
840
- v2result = -v2[0]
841
- v3result = -v3[0]
842
- v4result = -v4[0]
843
- v5result = -v5[0]
844
-
845
- v2out[0] = v2result
846
- v3out[0] = v3result
847
- v4out[0] = v4result
848
- v5out[0] = v5result
849
-
850
- # multiply these outputs by 2 so we've got something to backpropagate:
851
- v20[0] = wptype(2) * v2result[0]
852
- v21[0] = wptype(2) * v2result[1]
853
-
854
- v30[0] = wptype(2) * v3result[0]
855
- v31[0] = wptype(2) * v3result[1]
856
- v32[0] = wptype(2) * v3result[2]
857
-
858
- v40[0] = wptype(2) * v4result[0]
859
- v41[0] = wptype(2) * v4result[1]
860
- v42[0] = wptype(2) * v4result[2]
861
- v43[0] = wptype(2) * v4result[3]
862
-
863
- v50[0] = wptype(2) * v5result[0]
864
- v51[0] = wptype(2) * v5result[1]
865
- v52[0] = wptype(2) * v5result[2]
866
- v53[0] = wptype(2) * v5result[3]
867
- v54[0] = wptype(2) * v5result[4]
868
-
869
- kernel = getkernel(check_negation, suffix=dtype.__name__)
870
-
871
- if register_kernels:
872
- return
873
-
874
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
875
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
876
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
877
- v5_np = randvals((1, 5), dtype)
878
- v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
879
-
880
- v2out = wp.zeros(1, dtype=vec2, device=device)
881
- v3out = wp.zeros(1, dtype=vec3, device=device)
882
- v4out = wp.zeros(1, dtype=vec4, device=device)
883
- v5out = wp.zeros(1, dtype=vec5, device=device)
884
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
885
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
886
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
887
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
888
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
889
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
890
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
891
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
892
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
893
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
894
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
895
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
896
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
897
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
898
-
899
- tape = wp.Tape()
900
- with tape:
901
- wp.launch(
902
- kernel,
903
- dim=1,
904
- inputs=[v2, v3, v4, v5],
905
- outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
906
- device=device,
907
- )
908
-
909
- if dtype in np_float_types:
910
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
911
- tape.backward(loss=l)
912
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
913
- expected_grads = np.zeros_like(allgrads)
914
- expected_grads[i] = -2
915
- assert_np_equal(allgrads, expected_grads, tol=tol)
916
- tape.zero()
917
-
918
- assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
919
- assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
920
- assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
921
- assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
922
-
923
-
924
- def test_scalar_multiplication(test, device, dtype, register_kernels=False):
925
- np.random.seed(123)
926
-
927
- tol = {
928
- np.float16: 5.0e-3,
929
- np.float32: 1.0e-6,
930
- np.float64: 1.0e-8,
931
- }.get(dtype, 0)
932
-
933
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
934
- vec2 = wp.types.vector(length=2, dtype=wptype)
935
- vec3 = wp.types.vector(length=3, dtype=wptype)
936
- vec4 = wp.types.vector(length=4, dtype=wptype)
937
- vec5 = wp.types.vector(length=5, dtype=wptype)
938
-
939
- def check_mul(
940
- s: wp.array(dtype=wptype),
941
- v2: wp.array(dtype=vec2),
942
- v3: wp.array(dtype=vec3),
943
- v4: wp.array(dtype=vec4),
944
- v5: wp.array(dtype=vec5),
945
- v20: wp.array(dtype=wptype),
946
- v21: wp.array(dtype=wptype),
947
- v30: wp.array(dtype=wptype),
948
- v31: wp.array(dtype=wptype),
949
- v32: wp.array(dtype=wptype),
950
- v40: wp.array(dtype=wptype),
951
- v41: wp.array(dtype=wptype),
952
- v42: wp.array(dtype=wptype),
953
- v43: wp.array(dtype=wptype),
954
- v50: wp.array(dtype=wptype),
955
- v51: wp.array(dtype=wptype),
956
- v52: wp.array(dtype=wptype),
957
- v53: wp.array(dtype=wptype),
958
- v54: wp.array(dtype=wptype),
959
- ):
960
- v2result = s[0] * v2[0]
961
- v3result = s[0] * v3[0]
962
- v4result = s[0] * v4[0]
963
- v5result = s[0] * v5[0]
964
-
965
- # multiply outputs by 2 so we've got something to backpropagate:
966
- v20[0] = wptype(2) * v2result[0]
967
- v21[0] = wptype(2) * v2result[1]
968
-
969
- v30[0] = wptype(2) * v3result[0]
970
- v31[0] = wptype(2) * v3result[1]
971
- v32[0] = wptype(2) * v3result[2]
972
-
973
- v40[0] = wptype(2) * v4result[0]
974
- v41[0] = wptype(2) * v4result[1]
975
- v42[0] = wptype(2) * v4result[2]
976
- v43[0] = wptype(2) * v4result[3]
977
-
978
- v50[0] = wptype(2) * v5result[0]
979
- v51[0] = wptype(2) * v5result[1]
980
- v52[0] = wptype(2) * v5result[2]
981
- v53[0] = wptype(2) * v5result[3]
982
- v54[0] = wptype(2) * v5result[4]
983
-
984
- kernel = getkernel(check_mul, suffix=dtype.__name__)
985
-
986
- if register_kernels:
987
- return
988
-
989
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
990
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
991
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
992
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
993
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
994
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
995
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
996
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
997
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
998
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
999
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1000
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1001
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1002
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1003
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1004
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1005
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1006
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1007
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1008
- tape = wp.Tape()
1009
- with tape:
1010
- wp.launch(
1011
- kernel,
1012
- dim=1,
1013
- inputs=[
1014
- s,
1015
- v2,
1016
- v3,
1017
- v4,
1018
- v5,
1019
- ],
1020
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1021
- device=device,
1022
- )
1023
-
1024
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
1025
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
1026
-
1027
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
1028
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
1029
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
1030
-
1031
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
1032
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
1033
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
1034
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
1035
-
1036
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
1037
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
1038
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
1039
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
1040
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
1041
-
1042
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1043
-
1044
- if dtype in np_float_types:
1045
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
1046
- tape.backward(loss=l)
1047
- sgrad = tape.gradients[s].numpy()[0]
1048
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
1049
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
1050
- expected_grads = np.zeros_like(allgrads)
1051
- expected_grads[i] = s.numpy()[0] * 2
1052
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1053
- tape.zero()
1054
-
1055
-
1056
- def test_scalar_multiplication_rightmul(test, device, dtype, register_kernels=False):
1057
- np.random.seed(123)
1058
-
1059
- tol = {
1060
- np.float16: 5.0e-3,
1061
- np.float32: 1.0e-6,
1062
- np.float64: 1.0e-8,
1063
- }.get(dtype, 0)
1064
-
1065
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1066
- vec2 = wp.types.vector(length=2, dtype=wptype)
1067
- vec3 = wp.types.vector(length=3, dtype=wptype)
1068
- vec4 = wp.types.vector(length=4, dtype=wptype)
1069
- vec5 = wp.types.vector(length=5, dtype=wptype)
1070
-
1071
- def check_rightmul(
1072
- s: wp.array(dtype=wptype),
1073
- v2: wp.array(dtype=vec2),
1074
- v3: wp.array(dtype=vec3),
1075
- v4: wp.array(dtype=vec4),
1076
- v5: wp.array(dtype=vec5),
1077
- v20: wp.array(dtype=wptype),
1078
- v21: wp.array(dtype=wptype),
1079
- v30: wp.array(dtype=wptype),
1080
- v31: wp.array(dtype=wptype),
1081
- v32: wp.array(dtype=wptype),
1082
- v40: wp.array(dtype=wptype),
1083
- v41: wp.array(dtype=wptype),
1084
- v42: wp.array(dtype=wptype),
1085
- v43: wp.array(dtype=wptype),
1086
- v50: wp.array(dtype=wptype),
1087
- v51: wp.array(dtype=wptype),
1088
- v52: wp.array(dtype=wptype),
1089
- v53: wp.array(dtype=wptype),
1090
- v54: wp.array(dtype=wptype),
1091
- ):
1092
- v2result = v2[0] * s[0]
1093
- v3result = v3[0] * s[0]
1094
- v4result = v4[0] * s[0]
1095
- v5result = v5[0] * s[0]
1096
-
1097
- # multiply outputs by 2 so we've got something to backpropagate:
1098
- v20[0] = wptype(2) * v2result[0]
1099
- v21[0] = wptype(2) * v2result[1]
1100
-
1101
- v30[0] = wptype(2) * v3result[0]
1102
- v31[0] = wptype(2) * v3result[1]
1103
- v32[0] = wptype(2) * v3result[2]
1104
-
1105
- v40[0] = wptype(2) * v4result[0]
1106
- v41[0] = wptype(2) * v4result[1]
1107
- v42[0] = wptype(2) * v4result[2]
1108
- v43[0] = wptype(2) * v4result[3]
1109
-
1110
- v50[0] = wptype(2) * v5result[0]
1111
- v51[0] = wptype(2) * v5result[1]
1112
- v52[0] = wptype(2) * v5result[2]
1113
- v53[0] = wptype(2) * v5result[3]
1114
- v54[0] = wptype(2) * v5result[4]
1115
-
1116
- kernel = getkernel(check_rightmul, suffix=dtype.__name__)
1117
-
1118
- if register_kernels:
1119
- return
1120
-
1121
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
1122
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1123
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1124
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1125
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1126
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1127
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1128
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1129
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1130
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1131
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1132
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1133
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1134
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1135
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1136
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1137
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1138
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1139
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1140
- tape = wp.Tape()
1141
- with tape:
1142
- wp.launch(
1143
- kernel,
1144
- dim=1,
1145
- inputs=[
1146
- s,
1147
- v2,
1148
- v3,
1149
- v4,
1150
- v5,
1151
- ],
1152
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1153
- device=device,
1154
- )
1155
-
1156
- assert_np_equal(v20.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 0], tol=tol)
1157
- assert_np_equal(v21.numpy()[0], 2 * s.numpy()[0] * v2.numpy()[0, 1], tol=tol)
1158
-
1159
- assert_np_equal(v30.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 0], tol=10 * tol)
1160
- assert_np_equal(v31.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 1], tol=10 * tol)
1161
- assert_np_equal(v32.numpy()[0], 2 * s.numpy()[0] * v3.numpy()[0, 2], tol=10 * tol)
1162
-
1163
- assert_np_equal(v40.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 0], tol=10 * tol)
1164
- assert_np_equal(v41.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 1], tol=10 * tol)
1165
- assert_np_equal(v42.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 2], tol=10 * tol)
1166
- assert_np_equal(v43.numpy()[0], 2 * s.numpy()[0] * v4.numpy()[0, 3], tol=10 * tol)
1167
-
1168
- assert_np_equal(v50.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 0], tol=10 * tol)
1169
- assert_np_equal(v51.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 1], tol=10 * tol)
1170
- assert_np_equal(v52.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 2], tol=10 * tol)
1171
- assert_np_equal(v53.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 3], tol=10 * tol)
1172
- assert_np_equal(v54.numpy()[0], 2 * s.numpy()[0] * v5.numpy()[0, 4], tol=10 * tol)
1173
-
1174
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1175
-
1176
- if dtype in np_float_types:
1177
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43]):
1178
- tape.backward(loss=l)
1179
- sgrad = tape.gradients[s].numpy()[0]
1180
- assert_np_equal(sgrad, 2 * incmps[i], tol=10 * tol)
1181
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4]])
1182
- expected_grads = np.zeros_like(allgrads)
1183
- expected_grads[i] = s.numpy()[0] * 2
1184
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1185
- tape.zero()
1186
-
1187
-
1188
- def test_cw_multiplication(test, device, dtype, register_kernels=False):
1189
- np.random.seed(123)
1190
-
1191
- tol = {
1192
- np.float16: 5.0e-3,
1193
- np.float32: 1.0e-6,
1194
- np.float64: 1.0e-8,
1195
- }.get(dtype, 0)
1196
-
1197
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1198
- vec2 = wp.types.vector(length=2, dtype=wptype)
1199
- vec3 = wp.types.vector(length=3, dtype=wptype)
1200
- vec4 = wp.types.vector(length=4, dtype=wptype)
1201
- vec5 = wp.types.vector(length=5, dtype=wptype)
1202
-
1203
- def check_cw_mul(
1204
- s2: wp.array(dtype=vec2),
1205
- s3: wp.array(dtype=vec3),
1206
- s4: wp.array(dtype=vec4),
1207
- s5: wp.array(dtype=vec5),
1208
- v2: wp.array(dtype=vec2),
1209
- v3: wp.array(dtype=vec3),
1210
- v4: wp.array(dtype=vec4),
1211
- v5: wp.array(dtype=vec5),
1212
- v20: wp.array(dtype=wptype),
1213
- v21: wp.array(dtype=wptype),
1214
- v30: wp.array(dtype=wptype),
1215
- v31: wp.array(dtype=wptype),
1216
- v32: wp.array(dtype=wptype),
1217
- v40: wp.array(dtype=wptype),
1218
- v41: wp.array(dtype=wptype),
1219
- v42: wp.array(dtype=wptype),
1220
- v43: wp.array(dtype=wptype),
1221
- v50: wp.array(dtype=wptype),
1222
- v51: wp.array(dtype=wptype),
1223
- v52: wp.array(dtype=wptype),
1224
- v53: wp.array(dtype=wptype),
1225
- v54: wp.array(dtype=wptype),
1226
- ):
1227
- v2result = wp.cw_mul(s2[0], v2[0])
1228
- v3result = wp.cw_mul(s3[0], v3[0])
1229
- v4result = wp.cw_mul(s4[0], v4[0])
1230
- v5result = wp.cw_mul(s5[0], v5[0])
1231
-
1232
- v20[0] = wptype(2) * v2result[0]
1233
- v21[0] = wptype(2) * v2result[1]
1234
-
1235
- v30[0] = wptype(2) * v3result[0]
1236
- v31[0] = wptype(2) * v3result[1]
1237
- v32[0] = wptype(2) * v3result[2]
1238
-
1239
- v40[0] = wptype(2) * v4result[0]
1240
- v41[0] = wptype(2) * v4result[1]
1241
- v42[0] = wptype(2) * v4result[2]
1242
- v43[0] = wptype(2) * v4result[3]
1243
-
1244
- v50[0] = wptype(2) * v5result[0]
1245
- v51[0] = wptype(2) * v5result[1]
1246
- v52[0] = wptype(2) * v5result[2]
1247
- v53[0] = wptype(2) * v5result[3]
1248
- v54[0] = wptype(2) * v5result[4]
1249
-
1250
- kernel = getkernel(check_cw_mul, suffix=dtype.__name__)
1251
-
1252
- if register_kernels:
1253
- return
1254
-
1255
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1256
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1257
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1258
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1259
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1260
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1261
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1262
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1263
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1264
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1265
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1266
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1267
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1268
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1269
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1270
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1271
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1272
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1273
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1274
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1275
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1276
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1277
- tape = wp.Tape()
1278
- with tape:
1279
- wp.launch(
1280
- kernel,
1281
- dim=1,
1282
- inputs=[
1283
- s2,
1284
- s3,
1285
- s4,
1286
- s5,
1287
- v2,
1288
- v3,
1289
- v4,
1290
- v5,
1291
- ],
1292
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1293
- device=device,
1294
- )
1295
-
1296
- assert_np_equal(v20.numpy()[0], 2 * s2.numpy()[0, 0] * v2.numpy()[0, 0], tol=10 * tol)
1297
- assert_np_equal(v21.numpy()[0], 2 * s2.numpy()[0, 1] * v2.numpy()[0, 1], tol=10 * tol)
1298
-
1299
- assert_np_equal(v30.numpy()[0], 2 * s3.numpy()[0, 0] * v3.numpy()[0, 0], tol=10 * tol)
1300
- assert_np_equal(v31.numpy()[0], 2 * s3.numpy()[0, 1] * v3.numpy()[0, 1], tol=10 * tol)
1301
- assert_np_equal(v32.numpy()[0], 2 * s3.numpy()[0, 2] * v3.numpy()[0, 2], tol=10 * tol)
1302
-
1303
- assert_np_equal(v40.numpy()[0], 2 * s4.numpy()[0, 0] * v4.numpy()[0, 0], tol=10 * tol)
1304
- assert_np_equal(v41.numpy()[0], 2 * s4.numpy()[0, 1] * v4.numpy()[0, 1], tol=10 * tol)
1305
- assert_np_equal(v42.numpy()[0], 2 * s4.numpy()[0, 2] * v4.numpy()[0, 2], tol=10 * tol)
1306
- assert_np_equal(v43.numpy()[0], 2 * s4.numpy()[0, 3] * v4.numpy()[0, 3], tol=10 * tol)
1307
-
1308
- assert_np_equal(v50.numpy()[0], 2 * s5.numpy()[0, 0] * v5.numpy()[0, 0], tol=10 * tol)
1309
- assert_np_equal(v51.numpy()[0], 2 * s5.numpy()[0, 1] * v5.numpy()[0, 1], tol=10 * tol)
1310
- assert_np_equal(v52.numpy()[0], 2 * s5.numpy()[0, 2] * v5.numpy()[0, 2], tol=10 * tol)
1311
- assert_np_equal(v53.numpy()[0], 2 * s5.numpy()[0, 3] * v5.numpy()[0, 3], tol=10 * tol)
1312
- assert_np_equal(v54.numpy()[0], 2 * s5.numpy()[0, 4] * v5.numpy()[0, 4], tol=10 * tol)
1313
-
1314
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1315
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
1316
-
1317
- if dtype in np_float_types:
1318
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1319
- tape.backward(loss=l)
1320
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1321
- expected_grads = np.zeros_like(sgrads)
1322
- expected_grads[i] = incmps[i] * 2
1323
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1324
-
1325
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1326
- expected_grads = np.zeros_like(allgrads)
1327
- expected_grads[i] = scmps[i] * 2
1328
- assert_np_equal(allgrads, expected_grads, tol=10 * tol)
1329
-
1330
- tape.zero()
1331
-
1332
-
1333
- def test_scalar_division(test, device, dtype, register_kernels=False):
1334
- np.random.seed(123)
1335
-
1336
- tol = {
1337
- np.float16: 5.0e-3,
1338
- np.float32: 1.0e-6,
1339
- np.float64: 1.0e-8,
1340
- }.get(dtype, 0)
1341
-
1342
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1343
- vec2 = wp.types.vector(length=2, dtype=wptype)
1344
- vec3 = wp.types.vector(length=3, dtype=wptype)
1345
- vec4 = wp.types.vector(length=4, dtype=wptype)
1346
- vec5 = wp.types.vector(length=5, dtype=wptype)
1347
-
1348
- def check_div(
1349
- s: wp.array(dtype=wptype),
1350
- v2: wp.array(dtype=vec2),
1351
- v3: wp.array(dtype=vec3),
1352
- v4: wp.array(dtype=vec4),
1353
- v5: wp.array(dtype=vec5),
1354
- v20: wp.array(dtype=wptype),
1355
- v21: wp.array(dtype=wptype),
1356
- v30: wp.array(dtype=wptype),
1357
- v31: wp.array(dtype=wptype),
1358
- v32: wp.array(dtype=wptype),
1359
- v40: wp.array(dtype=wptype),
1360
- v41: wp.array(dtype=wptype),
1361
- v42: wp.array(dtype=wptype),
1362
- v43: wp.array(dtype=wptype),
1363
- v50: wp.array(dtype=wptype),
1364
- v51: wp.array(dtype=wptype),
1365
- v52: wp.array(dtype=wptype),
1366
- v53: wp.array(dtype=wptype),
1367
- v54: wp.array(dtype=wptype),
131
+ with test.assertRaisesRegex(
132
+ RuntimeError,
133
+ r"vec\(\) should not have dtype specified if numeric arguments are given, the dtype will be inferred from the argument types$",
1368
134
  ):
1369
- v2result = v2[0] / s[0]
1370
- v3result = v3[0] / s[0]
1371
- v4result = v4[0] / s[0]
1372
- v5result = v5[0] / s[0]
1373
-
1374
- v20[0] = wptype(2) * v2result[0]
1375
- v21[0] = wptype(2) * v2result[1]
1376
-
1377
- v30[0] = wptype(2) * v3result[0]
1378
- v31[0] = wptype(2) * v3result[1]
1379
- v32[0] = wptype(2) * v3result[2]
1380
-
1381
- v40[0] = wptype(2) * v4result[0]
1382
- v41[0] = wptype(2) * v4result[1]
1383
- v42[0] = wptype(2) * v4result[2]
1384
- v43[0] = wptype(2) * v4result[3]
1385
-
1386
- v50[0] = wptype(2) * v5result[0]
1387
- v51[0] = wptype(2) * v5result[1]
1388
- v52[0] = wptype(2) * v5result[2]
1389
- v53[0] = wptype(2) * v5result[3]
1390
- v54[0] = wptype(2) * v5result[4]
1391
-
1392
- kernel = getkernel(check_div, suffix=dtype.__name__)
1393
-
1394
- if register_kernels:
1395
- return
1396
-
1397
- s = wp.array(randvals([1], dtype), requires_grad=True, device=device)
1398
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1399
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1400
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1401
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1402
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1403
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1404
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1405
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1406
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1407
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1408
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1409
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1410
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1411
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1412
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1413
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1414
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1415
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1416
- tape = wp.Tape()
1417
- with tape:
1418
135
  wp.launch(
1419
136
  kernel,
1420
137
  dim=1,
1421
- inputs=[
1422
- s,
1423
- v2,
1424
- v3,
1425
- v4,
1426
- v5,
1427
- ],
1428
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
138
+ inputs=[],
1429
139
  device=device,
1430
- )
1431
-
1432
- if dtype in np_int_types:
1433
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // (s.numpy()[0])), tol=tol)
1434
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // (s.numpy()[0])), tol=tol)
1435
-
1436
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1437
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1438
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1439
-
1440
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1441
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1442
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1443
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1444
-
1445
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // (s.numpy()[0])), tol=10 * tol)
1446
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // (s.numpy()[0])), tol=10 * tol)
1447
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // (s.numpy()[0])), tol=10 * tol)
1448
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // (s.numpy()[0])), tol=10 * tol)
1449
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // (s.numpy()[0])), tol=10 * tol)
1450
-
1451
- else:
1452
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / (s.numpy()[0]), tol=tol)
1453
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / (s.numpy()[0]), tol=tol)
1454
-
1455
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1456
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1457
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1458
-
1459
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1460
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1461
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1462
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1463
-
1464
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / (s.numpy()[0]), tol=10 * tol)
1465
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / (s.numpy()[0]), tol=10 * tol)
1466
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / (s.numpy()[0]), tol=10 * tol)
1467
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / (s.numpy()[0]), tol=10 * tol)
1468
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / (s.numpy()[0]), tol=10 * tol)
1469
-
1470
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1471
-
1472
- if dtype in np_float_types:
1473
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1474
- tape.backward(loss=l)
1475
- sgrad = tape.gradients[s].numpy()[0]
1476
-
1477
- # d/ds v/s = -v/s^2
1478
- assert_np_equal(sgrad, -2 * incmps[i] / (s.numpy()[0] * s.numpy()[0]), tol=10 * tol)
1479
-
1480
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1481
- expected_grads = np.zeros_like(allgrads)
1482
- expected_grads[i] = 2 / s.numpy()[0]
1483
-
1484
- # d/dv v/s = 1/s
1485
- assert_np_equal(allgrads, expected_grads, tol=tol)
1486
- tape.zero()
1487
-
1488
-
1489
- def test_cw_division(test, device, dtype, register_kernels=False):
1490
- np.random.seed(123)
1491
-
1492
- tol = {
1493
- np.float16: 1.0e-2,
1494
- np.float32: 1.0e-6,
1495
- np.float64: 1.0e-8,
1496
- }.get(dtype, 0)
1497
-
1498
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1499
- vec2 = wp.types.vector(length=2, dtype=wptype)
1500
- vec3 = wp.types.vector(length=3, dtype=wptype)
1501
- vec4 = wp.types.vector(length=4, dtype=wptype)
1502
- vec5 = wp.types.vector(length=5, dtype=wptype)
1503
-
1504
- def check_cw_div(
1505
- s2: wp.array(dtype=vec2),
1506
- s3: wp.array(dtype=vec3),
1507
- s4: wp.array(dtype=vec4),
1508
- s5: wp.array(dtype=vec5),
1509
- v2: wp.array(dtype=vec2),
1510
- v3: wp.array(dtype=vec3),
1511
- v4: wp.array(dtype=vec4),
1512
- v5: wp.array(dtype=vec5),
1513
- v20: wp.array(dtype=wptype),
1514
- v21: wp.array(dtype=wptype),
1515
- v30: wp.array(dtype=wptype),
1516
- v31: wp.array(dtype=wptype),
1517
- v32: wp.array(dtype=wptype),
1518
- v40: wp.array(dtype=wptype),
1519
- v41: wp.array(dtype=wptype),
1520
- v42: wp.array(dtype=wptype),
1521
- v43: wp.array(dtype=wptype),
1522
- v50: wp.array(dtype=wptype),
1523
- v51: wp.array(dtype=wptype),
1524
- v52: wp.array(dtype=wptype),
1525
- v53: wp.array(dtype=wptype),
1526
- v54: wp.array(dtype=wptype),
1527
- ):
1528
- v2result = wp.cw_div(v2[0], s2[0])
1529
- v3result = wp.cw_div(v3[0], s3[0])
1530
- v4result = wp.cw_div(v4[0], s4[0])
1531
- v5result = wp.cw_div(v5[0], s5[0])
1532
-
1533
- v20[0] = wptype(2) * v2result[0]
1534
- v21[0] = wptype(2) * v2result[1]
1535
-
1536
- v30[0] = wptype(2) * v3result[0]
1537
- v31[0] = wptype(2) * v3result[1]
1538
- v32[0] = wptype(2) * v3result[2]
140
+ )
1539
141
 
1540
- v40[0] = wptype(2) * v4result[0]
1541
- v41[0] = wptype(2) * v4result[1]
1542
- v42[0] = wptype(2) * v4result[2]
1543
- v43[0] = wptype(2) * v4result[3]
1544
142
 
1545
- v50[0] = wptype(2) * v5result[0]
1546
- v51[0] = wptype(2) * v5result[1]
1547
- v52[0] = wptype(2) * v5result[2]
1548
- v53[0] = wptype(2) * v5result[3]
1549
- v54[0] = wptype(2) * v5result[4]
143
+ def test_anon_constructor_error_numeric_args_mismatch(test, device):
144
+ @wp.kernel
145
+ def kernel():
146
+ wp.vector(1.0, 2)
1550
147
 
1551
- kernel = getkernel(check_cw_div, suffix=dtype.__name__)
148
+ with test.assertRaisesRegex(
149
+ RuntimeError,
150
+ r"All numeric arguments to vec\(\) constructor should have the same "
151
+ r"type, expected 2 arg_types of type <class 'warp.types.float32'>, "
152
+ r"received <class 'warp.types.float32'>,<class 'warp.types.int32'>$",
153
+ ):
154
+ wp.launch(
155
+ kernel,
156
+ dim=1,
157
+ inputs=[],
158
+ device=device,
159
+ )
1552
160
 
1553
- if register_kernels:
1554
- return
1555
161
 
1556
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1557
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1558
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1559
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1560
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1561
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1562
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1563
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1564
- v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1565
- v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1566
- v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1567
- v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1568
- v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1569
- v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1570
- v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1571
- v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1572
- v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1573
- v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1574
- v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1575
- v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1576
- v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1577
- v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1578
- tape = wp.Tape()
1579
- with tape:
162
+ def test_tpl_constructor_error_incompatible_sizes(test, device):
163
+ @wp.kernel
164
+ def kernel():
165
+ wp.vec3(wp.vec2(1.0, 2.0))
166
+
167
+ with test.assertRaisesRegex(RuntimeError, r"Incompatible matrix sizes for casting copy constructor, 3 vs 2"):
1580
168
  wp.launch(
1581
169
  kernel,
1582
170
  dim=1,
1583
- inputs=[
1584
- s2,
1585
- s3,
1586
- s4,
1587
- s5,
1588
- v2,
1589
- v3,
1590
- v4,
1591
- v5,
1592
- ],
1593
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
171
+ inputs=[],
1594
172
  device=device,
1595
173
  )
1596
174
 
1597
- if dtype in np_int_types:
1598
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] // s2.numpy()[0, 0]), tol=tol)
1599
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] // s2.numpy()[0, 1]), tol=tol)
1600
-
1601
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] // s3.numpy()[0, 0]), tol=tol)
1602
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] // s3.numpy()[0, 1]), tol=tol)
1603
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] // s3.numpy()[0, 2]), tol=tol)
1604
-
1605
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] // s4.numpy()[0, 0]), tol=tol)
1606
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] // s4.numpy()[0, 1]), tol=tol)
1607
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] // s4.numpy()[0, 2]), tol=tol)
1608
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] // s4.numpy()[0, 3]), tol=tol)
1609
-
1610
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] // s5.numpy()[0, 0]), tol=tol)
1611
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] // s5.numpy()[0, 1]), tol=tol)
1612
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] // s5.numpy()[0, 2]), tol=tol)
1613
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] // s5.numpy()[0, 3]), tol=tol)
1614
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] // s5.numpy()[0, 4]), tol=tol)
1615
- else:
1616
- assert_np_equal(v20.numpy()[0], 2 * v2.numpy()[0, 0] / s2.numpy()[0, 0], tol=tol)
1617
- assert_np_equal(v21.numpy()[0], 2 * v2.numpy()[0, 1] / s2.numpy()[0, 1], tol=tol)
1618
-
1619
- assert_np_equal(v30.numpy()[0], 2 * v3.numpy()[0, 0] / s3.numpy()[0, 0], tol=tol)
1620
- assert_np_equal(v31.numpy()[0], 2 * v3.numpy()[0, 1] / s3.numpy()[0, 1], tol=tol)
1621
- assert_np_equal(v32.numpy()[0], 2 * v3.numpy()[0, 2] / s3.numpy()[0, 2], tol=tol)
1622
-
1623
- assert_np_equal(v40.numpy()[0], 2 * v4.numpy()[0, 0] / s4.numpy()[0, 0], tol=tol)
1624
- assert_np_equal(v41.numpy()[0], 2 * v4.numpy()[0, 1] / s4.numpy()[0, 1], tol=tol)
1625
- assert_np_equal(v42.numpy()[0], 2 * v4.numpy()[0, 2] / s4.numpy()[0, 2], tol=tol)
1626
- assert_np_equal(v43.numpy()[0], 2 * v4.numpy()[0, 3] / s4.numpy()[0, 3], tol=tol)
1627
-
1628
- assert_np_equal(v50.numpy()[0], 2 * v5.numpy()[0, 0] / s5.numpy()[0, 0], tol=tol)
1629
- assert_np_equal(v51.numpy()[0], 2 * v5.numpy()[0, 1] / s5.numpy()[0, 1], tol=tol)
1630
- assert_np_equal(v52.numpy()[0], 2 * v5.numpy()[0, 2] / s5.numpy()[0, 2], tol=tol)
1631
- assert_np_equal(v53.numpy()[0], 2 * v5.numpy()[0, 3] / s5.numpy()[0, 3], tol=tol)
1632
- assert_np_equal(v54.numpy()[0], 2 * v5.numpy()[0, 4] / s5.numpy()[0, 4], tol=tol)
1633
175
 
1634
- if dtype in np_float_types:
1635
- incmps = np.concatenate([v.numpy()[0] for v in [v2, v3, v4, v5]])
1636
- scmps = np.concatenate([v.numpy()[0] for v in [s2, s3, s4, s5]])
176
+ def test_tpl_constructor_error_numeric_args_mismatch(test, device):
177
+ @wp.kernel
178
+ def kernel():
179
+ wp.vec2(1.0, 2)
1637
180
 
1638
- for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1639
- tape.backward(loss=l)
1640
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1641
- expected_grads = np.zeros_like(sgrads)
181
+ with test.assertRaisesRegex(
182
+ RuntimeError,
183
+ r"All numeric arguments to vec\(\) constructor should have the same "
184
+ r"type, expected 2 arg_types of type <class 'warp.types.float32'>, "
185
+ r"received <class 'warp.types.float32'>,<class 'warp.types.int32'>$",
186
+ ):
187
+ wp.launch(
188
+ kernel,
189
+ dim=1,
190
+ inputs=[],
191
+ device=device,
192
+ )
1642
193
 
1643
- # d/ds v/s = -v/s^2
1644
- expected_grads[i] = -incmps[i] * 2 / (scmps[i] * scmps[i])
1645
- assert_np_equal(sgrads, expected_grads, tol=20 * tol)
1646
194
 
1647
- allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
1648
- expected_grads = np.zeros_like(allgrads)
195
+ def test_tpl_ops_with_anon(test, device):
196
+ vec3i = wp.vec(3, dtype=int)
1649
197
 
1650
- # d/dv v/s = 1/s
1651
- expected_grads[i] = 2 / scmps[i]
1652
- assert_np_equal(allgrads, expected_grads, tol=tol)
198
+ v = wp.vec3i(1, 2, 3)
199
+ v += vec3i(2, 3, 4)
200
+ v -= vec3i(3, 4, 5)
201
+ test.assertSequenceEqual(v, (0, 1, 2))
1653
202
 
1654
- tape.zero()
203
+ v = vec3i(1, 2, 3)
204
+ v += wp.vec3i(2, 3, 4)
205
+ v -= wp.vec3i(3, 4, 5)
206
+ test.assertSequenceEqual(v, (0, 1, 2))
1655
207
 
1656
208
 
1657
- def test_addition(test, device, dtype, register_kernels=False):
1658
- np.random.seed(123)
209
+ def test_negation(test, device, dtype, register_kernels=False):
210
+ rng = np.random.default_rng(123)
1659
211
 
1660
212
  tol = {
1661
213
  np.float16: 5.0e-3,
@@ -1669,15 +221,15 @@ def test_addition(test, device, dtype, register_kernels=False):
1669
221
  vec4 = wp.types.vector(length=4, dtype=wptype)
1670
222
  vec5 = wp.types.vector(length=5, dtype=wptype)
1671
223
 
1672
- def check_add(
1673
- s2: wp.array(dtype=vec2),
1674
- s3: wp.array(dtype=vec3),
1675
- s4: wp.array(dtype=vec4),
1676
- s5: wp.array(dtype=vec5),
224
+ def check_negation(
1677
225
  v2: wp.array(dtype=vec2),
1678
226
  v3: wp.array(dtype=vec3),
1679
227
  v4: wp.array(dtype=vec4),
1680
228
  v5: wp.array(dtype=vec5),
229
+ v2out: wp.array(dtype=vec2),
230
+ v3out: wp.array(dtype=vec3),
231
+ v4out: wp.array(dtype=vec4),
232
+ v5out: wp.array(dtype=vec5),
1681
233
  v20: wp.array(dtype=wptype),
1682
234
  v21: wp.array(dtype=wptype),
1683
235
  v30: wp.array(dtype=wptype),
@@ -1693,11 +245,17 @@ def test_addition(test, device, dtype, register_kernels=False):
1693
245
  v53: wp.array(dtype=wptype),
1694
246
  v54: wp.array(dtype=wptype),
1695
247
  ):
1696
- v2result = v2[0] + s2[0]
1697
- v3result = v3[0] + s3[0]
1698
- v4result = v4[0] + s4[0]
1699
- v5result = v5[0] + s5[0]
248
+ v2result = -v2[0]
249
+ v3result = -v3[0]
250
+ v4result = -v4[0]
251
+ v5result = -v5[0]
252
+
253
+ v2out[0] = v2result
254
+ v3out[0] = v3result
255
+ v4out[0] = v4result
256
+ v5out[0] = v5result
1700
257
 
258
+ # multiply these outputs by 2 so we've got something to backpropagate:
1701
259
  v20[0] = wptype(2) * v2result[0]
1702
260
  v21[0] = wptype(2) * v2result[1]
1703
261
 
@@ -1716,19 +274,21 @@ def test_addition(test, device, dtype, register_kernels=False):
1716
274
  v53[0] = wptype(2) * v5result[3]
1717
275
  v54[0] = wptype(2) * v5result[4]
1718
276
 
1719
- kernel = getkernel(check_add, suffix=dtype.__name__)
277
+ kernel = getkernel(check_negation, suffix=dtype.__name__)
1720
278
 
1721
279
  if register_kernels:
1722
280
  return
1723
281
 
1724
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1725
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1726
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1727
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1728
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1729
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1730
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1731
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
282
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
283
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
284
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
285
+ v5_np = randvals(rng, (1, 5), dtype)
286
+ v5 = wp.array(v5_np, dtype=vec5, requires_grad=True, device=device)
287
+
288
+ v2out = wp.zeros(1, dtype=vec2, device=device)
289
+ v3out = wp.zeros(1, dtype=vec3, device=device)
290
+ v4out = wp.zeros(1, dtype=vec4, device=device)
291
+ v5out = wp.zeros(1, dtype=vec5, device=device)
1732
292
  v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1733
293
  v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1734
294
  v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -1743,67 +303,33 @@ def test_addition(test, device, dtype, register_kernels=False):
1743
303
  v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1744
304
  v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1745
305
  v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
306
+
1746
307
  tape = wp.Tape()
1747
308
  with tape:
1748
309
  wp.launch(
1749
310
  kernel,
1750
311
  dim=1,
1751
- inputs=[
1752
- s2,
1753
- s3,
1754
- s4,
1755
- s5,
1756
- v2,
1757
- v3,
1758
- v4,
1759
- v5,
1760
- ],
1761
- outputs=[v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
312
+ inputs=[v2, v3, v4, v5],
313
+ outputs=[v2out, v3out, v4out, v5out, v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54],
1762
314
  device=device,
1763
315
  )
1764
316
 
1765
- assert_np_equal(v20.numpy()[0], 2 * (v2.numpy()[0, 0] + s2.numpy()[0, 0]), tol=tol)
1766
- assert_np_equal(v21.numpy()[0], 2 * (v2.numpy()[0, 1] + s2.numpy()[0, 1]), tol=tol)
1767
-
1768
- assert_np_equal(v30.numpy()[0], 2 * (v3.numpy()[0, 0] + s3.numpy()[0, 0]), tol=tol)
1769
- assert_np_equal(v31.numpy()[0], 2 * (v3.numpy()[0, 1] + s3.numpy()[0, 1]), tol=tol)
1770
- assert_np_equal(v32.numpy()[0], 2 * (v3.numpy()[0, 2] + s3.numpy()[0, 2]), tol=tol)
1771
-
1772
- assert_np_equal(v40.numpy()[0], 2 * (v4.numpy()[0, 0] + s4.numpy()[0, 0]), tol=tol)
1773
- assert_np_equal(v41.numpy()[0], 2 * (v4.numpy()[0, 1] + s4.numpy()[0, 1]), tol=tol)
1774
- assert_np_equal(v42.numpy()[0], 2 * (v4.numpy()[0, 2] + s4.numpy()[0, 2]), tol=tol)
1775
- assert_np_equal(v43.numpy()[0], 2 * (v4.numpy()[0, 3] + s4.numpy()[0, 3]), tol=tol)
1776
-
1777
- assert_np_equal(v50.numpy()[0], 2 * (v5.numpy()[0, 0] + s5.numpy()[0, 0]), tol=tol)
1778
- assert_np_equal(v51.numpy()[0], 2 * (v5.numpy()[0, 1] + s5.numpy()[0, 1]), tol=tol)
1779
- assert_np_equal(v52.numpy()[0], 2 * (v5.numpy()[0, 2] + s5.numpy()[0, 2]), tol=tol)
1780
- assert_np_equal(v53.numpy()[0], 2 * (v5.numpy()[0, 3] + s5.numpy()[0, 3]), tol=tol)
1781
- assert_np_equal(v54.numpy()[0], 2 * (v5.numpy()[0, 4] + s5.numpy()[0, 4]), tol=2 * tol)
1782
-
1783
317
  if dtype in np_float_types:
1784
318
  for i, l in enumerate([v20, v21, v30, v31, v32, v40, v41, v42, v43, v50, v51, v52, v53, v54]):
1785
319
  tape.backward(loss=l)
1786
- sgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [s2, s3, s4, s5]])
1787
- expected_grads = np.zeros_like(sgrads)
1788
-
1789
- expected_grads[i] = 2
1790
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
1791
-
1792
320
  allgrads = np.concatenate([tape.gradients[v].numpy()[0] for v in [v2, v3, v4, v5]])
321
+ expected_grads = np.zeros_like(allgrads)
322
+ expected_grads[i] = -2
1793
323
  assert_np_equal(allgrads, expected_grads, tol=tol)
1794
-
1795
324
  tape.zero()
1796
325
 
326
+ assert_np_equal(v2out.numpy()[0], -v2.numpy()[0], tol=tol)
327
+ assert_np_equal(v3out.numpy()[0], -v3.numpy()[0], tol=tol)
328
+ assert_np_equal(v4out.numpy()[0], -v4.numpy()[0], tol=tol)
329
+ assert_np_equal(v5out.numpy()[0], -v5.numpy()[0], tol=tol)
1797
330
 
1798
- def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1799
- np.random.seed(123)
1800
-
1801
- tol = {
1802
- np.float16: 1.0e-3,
1803
- np.float32: 1.0e-6,
1804
- np.float64: 1.0e-8,
1805
- }.get(dtype, 0)
1806
331
 
332
+ def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1807
333
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1808
334
  vec2 = wp.types.vector(length=2, dtype=wptype)
1809
335
  vec3 = wp.types.vector(length=3, dtype=wptype)
@@ -1852,7 +378,7 @@ def test_subtraction_unsigned(test, device, dtype, register_kernels=False):
1852
378
 
1853
379
 
1854
380
  def test_subtraction(test, device, dtype, register_kernels=False):
1855
- np.random.seed(123)
381
+ rng = np.random.default_rng(123)
1856
382
 
1857
383
  tol = {
1858
384
  np.float16: 5.0e-3,
@@ -1919,14 +445,14 @@ def test_subtraction(test, device, dtype, register_kernels=False):
1919
445
  if register_kernels:
1920
446
  return
1921
447
 
1922
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1923
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1924
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1925
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1926
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
1927
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
1928
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
1929
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
448
+ s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
449
+ s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
450
+ s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
451
+ s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
452
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
453
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
454
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
455
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
1930
456
  v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1931
457
  v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1932
458
  v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -1997,129 +523,8 @@ def test_subtraction(test, device, dtype, register_kernels=False):
1997
523
  tape.zero()
1998
524
 
1999
525
 
2000
- def test_dotproduct(test, device, dtype, register_kernels=False):
2001
- np.random.seed(123)
2002
-
2003
- tol = {
2004
- np.float16: 1.0e-2,
2005
- np.float32: 1.0e-6,
2006
- np.float64: 1.0e-8,
2007
- }.get(dtype, 0)
2008
-
2009
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2010
- vec2 = wp.types.vector(length=2, dtype=wptype)
2011
- vec3 = wp.types.vector(length=3, dtype=wptype)
2012
- vec4 = wp.types.vector(length=4, dtype=wptype)
2013
- vec5 = wp.types.vector(length=5, dtype=wptype)
2014
-
2015
- def check_dot(
2016
- s2: wp.array(dtype=vec2),
2017
- s3: wp.array(dtype=vec3),
2018
- s4: wp.array(dtype=vec4),
2019
- s5: wp.array(dtype=vec5),
2020
- v2: wp.array(dtype=vec2),
2021
- v3: wp.array(dtype=vec3),
2022
- v4: wp.array(dtype=vec4),
2023
- v5: wp.array(dtype=vec5),
2024
- dot2: wp.array(dtype=wptype),
2025
- dot3: wp.array(dtype=wptype),
2026
- dot4: wp.array(dtype=wptype),
2027
- dot5: wp.array(dtype=wptype),
2028
- ):
2029
- dot2[0] = wptype(2) * wp.dot(v2[0], s2[0])
2030
- dot3[0] = wptype(2) * wp.dot(v3[0], s3[0])
2031
- dot4[0] = wptype(2) * wp.dot(v4[0], s4[0])
2032
- dot5[0] = wptype(2) * wp.dot(v5[0], s5[0])
2033
-
2034
- kernel = getkernel(check_dot, suffix=dtype.__name__)
2035
-
2036
- if register_kernels:
2037
- return
2038
-
2039
- s2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2040
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2041
- s4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2042
- s5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2043
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2044
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2045
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2046
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2047
- dot2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2048
- dot3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2049
- dot4 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2050
- dot5 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2051
- tape = wp.Tape()
2052
- with tape:
2053
- wp.launch(
2054
- kernel,
2055
- dim=1,
2056
- inputs=[
2057
- s2,
2058
- s3,
2059
- s4,
2060
- s5,
2061
- v2,
2062
- v3,
2063
- v4,
2064
- v5,
2065
- ],
2066
- outputs=[dot2, dot3, dot4, dot5],
2067
- device=device,
2068
- )
2069
-
2070
- assert_np_equal(dot2.numpy()[0], 2.0 * (v2.numpy() * s2.numpy()).sum(), tol=10 * tol)
2071
- assert_np_equal(dot3.numpy()[0], 2.0 * (v3.numpy() * s3.numpy()).sum(), tol=10 * tol)
2072
- assert_np_equal(dot4.numpy()[0], 2.0 * (v4.numpy() * s4.numpy()).sum(), tol=10 * tol)
2073
- assert_np_equal(dot5.numpy()[0], 2.0 * (v5.numpy() * s5.numpy()).sum(), tol=10 * tol)
2074
-
2075
- if dtype in np_float_types:
2076
- tape.backward(loss=dot2)
2077
- sgrads = tape.gradients[s2].numpy()[0]
2078
- expected_grads = 2.0 * v2.numpy()[0]
2079
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2080
-
2081
- vgrads = tape.gradients[v2].numpy()[0]
2082
- expected_grads = 2.0 * s2.numpy()[0]
2083
- assert_np_equal(vgrads, expected_grads, tol=tol)
2084
-
2085
- tape.zero()
2086
-
2087
- tape.backward(loss=dot3)
2088
- sgrads = tape.gradients[s3].numpy()[0]
2089
- expected_grads = 2.0 * v3.numpy()[0]
2090
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2091
-
2092
- vgrads = tape.gradients[v3].numpy()[0]
2093
- expected_grads = 2.0 * s3.numpy()[0]
2094
- assert_np_equal(vgrads, expected_grads, tol=tol)
2095
-
2096
- tape.zero()
2097
-
2098
- tape.backward(loss=dot4)
2099
- sgrads = tape.gradients[s4].numpy()[0]
2100
- expected_grads = 2.0 * v4.numpy()[0]
2101
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2102
-
2103
- vgrads = tape.gradients[v4].numpy()[0]
2104
- expected_grads = 2.0 * s4.numpy()[0]
2105
- assert_np_equal(vgrads, expected_grads, tol=tol)
2106
-
2107
- tape.zero()
2108
-
2109
- tape.backward(loss=dot5)
2110
- sgrads = tape.gradients[s5].numpy()[0]
2111
- expected_grads = 2.0 * v5.numpy()[0]
2112
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
2113
-
2114
- vgrads = tape.gradients[v5].numpy()[0]
2115
- expected_grads = 2.0 * s5.numpy()[0]
2116
- assert_np_equal(vgrads, expected_grads, tol=10 * tol)
2117
-
2118
- tape.zero()
2119
-
2120
-
2121
526
  def test_length(test, device, dtype, register_kernels=False):
2122
- np.random.seed(123)
527
+ rng = np.random.default_rng(123)
2123
528
 
2124
529
  tol = {
2125
530
  np.float16: 5.0e-3,
@@ -2162,10 +567,10 @@ def test_length(test, device, dtype, register_kernels=False):
2162
567
  if register_kernels:
2163
568
  return
2164
569
 
2165
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2166
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2167
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2168
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
570
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
571
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
572
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
573
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2169
574
 
2170
575
  l2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2171
576
  l3 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2252,7 +657,7 @@ def test_length(test, device, dtype, register_kernels=False):
2252
657
 
2253
658
 
2254
659
  def test_normalize(test, device, dtype, register_kernels=False):
2255
- np.random.seed(123)
660
+ rng = np.random.default_rng(123)
2256
661
 
2257
662
  tol = {
2258
663
  np.float16: 5.0e-3,
@@ -2360,10 +765,10 @@ def test_normalize(test, device, dtype, register_kernels=False):
2360
765
 
2361
766
  # I've already tested the things I'm using in check_normalize_alt, so I'll just
2362
767
  # make sure the two are giving the same results/gradients
2363
- v2 = wp.array(randvals((1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
2364
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2365
- v4 = wp.array(randvals((1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
2366
- v5 = wp.array(randvals((1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
768
+ v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
769
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
770
+ v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
771
+ v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
2367
772
 
2368
773
  n20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2369
774
  n21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2485,7 +890,7 @@ def test_normalize(test, device, dtype, register_kernels=False):
2485
890
 
2486
891
 
2487
892
  def test_crossproduct(test, device, dtype, register_kernels=False):
2488
- np.random.seed(123)
893
+ rng = np.random.default_rng(123)
2489
894
 
2490
895
  tol = {
2491
896
  np.float16: 5.0e-3,
@@ -2515,8 +920,8 @@ def test_crossproduct(test, device, dtype, register_kernels=False):
2515
920
  if register_kernels:
2516
921
  return
2517
922
 
2518
- s3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2519
- v3 = wp.array(randvals((1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
923
+ s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
924
+ v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
2520
925
  c0 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2521
926
  c1 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2522
927
  c2 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
@@ -2579,216 +984,115 @@ def test_crossproduct(test, device, dtype, register_kernels=False):
2579
984
  tape.zero()
2580
985
 
2581
986
 
2582
- def test_minmax(test, device, dtype, register_kernels=False):
2583
- np.random.seed(123)
987
+ def test_casting_constructors(test, device, dtype, register_kernels=False):
988
+ np_type = np.dtype(dtype)
989
+ wp_type = wp.types.np_dtype_to_warp_type[np_type]
990
+ vec3 = wp.types.vector(length=3, dtype=wp_type)
2584
991
 
2585
- # \TODO: not quite sure why, but the numbers are off for 16 bit float
2586
- # on the cpu (but not cuda). This is probably just the sketchy float16
2587
- # arithmetic I implemented to get all this stuff working, so
2588
- # hopefully that can be fixed when we do that correctly.
2589
- tol = {
2590
- np.float16: 1.0e-2,
2591
- }.get(dtype, 0)
992
+ np16 = np.dtype(np.float16)
993
+ wp16 = wp.types.np_dtype_to_warp_type[np16]
2592
994
 
2593
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2594
- vec2 = wp.types.vector(length=2, dtype=wptype)
2595
- vec3 = wp.types.vector(length=3, dtype=wptype)
2596
- vec4 = wp.types.vector(length=4, dtype=wptype)
2597
- vec5 = wp.types.vector(length=5, dtype=wptype)
995
+ np32 = np.dtype(np.float32)
996
+ wp32 = wp.types.np_dtype_to_warp_type[np32]
2598
997
 
2599
- # \TODO: Also not quite sure why: this kernel compiles incredibly
2600
- # slowly though...
2601
- def check_vec_min_max(
2602
- a: wp.array(dtype=wptype, ndim=2),
2603
- b: wp.array(dtype=wptype, ndim=2),
2604
- mins: wp.array(dtype=wptype, ndim=2),
2605
- maxs: wp.array(dtype=wptype, ndim=2),
2606
- ):
2607
- for i in range(10):
2608
- # multiplying by 2 so we've got something to backpropagate:
2609
- a2read = vec2(a[i, 0], a[i, 1])
2610
- b2read = vec2(b[i, 0], b[i, 1])
2611
- c2 = wptype(2) * wp.min(a2read, b2read)
2612
- d2 = wptype(2) * wp.max(a2read, b2read)
2613
-
2614
- a3read = vec3(a[i, 2], a[i, 3], a[i, 4])
2615
- b3read = vec3(b[i, 2], b[i, 3], b[i, 4])
2616
- c3 = wptype(2) * wp.min(a3read, b3read)
2617
- d3 = wptype(2) * wp.max(a3read, b3read)
2618
-
2619
- a4read = vec4(a[i, 5], a[i, 6], a[i, 7], a[i, 8])
2620
- b4read = vec4(b[i, 5], b[i, 6], b[i, 7], b[i, 8])
2621
- c4 = wptype(2) * wp.min(a4read, b4read)
2622
- d4 = wptype(2) * wp.max(a4read, b4read)
2623
-
2624
- a5read = vec5(a[i, 9], a[i, 10], a[i, 11], a[i, 12], a[i, 13])
2625
- b5read = vec5(b[i, 9], b[i, 10], b[i, 11], b[i, 12], b[i, 13])
2626
- c5 = wptype(2) * wp.min(a5read, b5read)
2627
- d5 = wptype(2) * wp.max(a5read, b5read)
2628
-
2629
- mins[i, 0] = c2[0]
2630
- mins[i, 1] = c2[1]
2631
-
2632
- mins[i, 2] = c3[0]
2633
- mins[i, 3] = c3[1]
2634
- mins[i, 4] = c3[2]
2635
-
2636
- mins[i, 5] = c4[0]
2637
- mins[i, 6] = c4[1]
2638
- mins[i, 7] = c4[2]
2639
- mins[i, 8] = c4[3]
2640
-
2641
- mins[i, 9] = c5[0]
2642
- mins[i, 10] = c5[1]
2643
- mins[i, 11] = c5[2]
2644
- mins[i, 12] = c5[3]
2645
- mins[i, 13] = c5[4]
2646
-
2647
- maxs[i, 0] = d2[0]
2648
- maxs[i, 1] = d2[1]
2649
-
2650
- maxs[i, 2] = d3[0]
2651
- maxs[i, 3] = d3[1]
2652
- maxs[i, 4] = d3[2]
2653
-
2654
- maxs[i, 5] = d4[0]
2655
- maxs[i, 6] = d4[1]
2656
- maxs[i, 7] = d4[2]
2657
- maxs[i, 8] = d4[3]
2658
-
2659
- maxs[i, 9] = d5[0]
2660
- maxs[i, 10] = d5[1]
2661
- maxs[i, 11] = d5[2]
2662
- maxs[i, 12] = d5[3]
2663
- maxs[i, 13] = d5[4]
2664
-
2665
- kernel = getkernel(check_vec_min_max, suffix=dtype.__name__)
2666
- output_select_kernel = get_select_kernel2(wptype)
998
+ np64 = np.dtype(np.float64)
999
+ wp64 = wp.types.np_dtype_to_warp_type[np64]
2667
1000
 
2668
- if register_kernels:
2669
- return
1001
+ def cast_float16(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp16, ndim=2)):
1002
+ tid = wp.tid()
2670
1003
 
2671
- a = wp.array(randvals((10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
2672
- b = wp.array(randvals((10, 14), dtype), dtype=wptype, requires_grad=True, device=device)
1004
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1005
+ v2 = wp.vector(v1, dtype=wp16)
2673
1006
 
2674
- mins = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
2675
- maxs = wp.zeros((10, 14), dtype=wptype, requires_grad=True, device=device)
1007
+ b[tid, 0] = v2[0]
1008
+ b[tid, 1] = v2[1]
1009
+ b[tid, 2] = v2[2]
2676
1010
 
2677
- tape = wp.Tape()
2678
- with tape:
2679
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
1011
+ def cast_float32(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp32, ndim=2)):
1012
+ tid = wp.tid()
2680
1013
 
2681
- assert_np_equal(mins.numpy(), 2 * np.minimum(a.numpy(), b.numpy()), tol=tol)
2682
- assert_np_equal(maxs.numpy(), 2 * np.maximum(a.numpy(), b.numpy()), tol=tol)
1014
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1015
+ v2 = wp.vector(v1, dtype=wp32)
2683
1016
 
2684
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
2685
- if dtype in np_float_types:
2686
- for i in range(10):
2687
- for j in range(14):
2688
- tape = wp.Tape()
2689
- with tape:
2690
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2691
- wp.launch(output_select_kernel, dim=1, inputs=[mins, i, j], outputs=[out], device=device)
2692
-
2693
- tape.backward(loss=out)
2694
- expected = np.zeros_like(a.numpy())
2695
- expected[i, j] = 2 if (a.numpy()[i, j] < b.numpy()[i, j]) else 0
2696
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2697
- expected[i, j] = 2 if (b.numpy()[i, j] < a.numpy()[i, j]) else 0
2698
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2699
- tape.zero()
2700
-
2701
- tape = wp.Tape()
2702
- with tape:
2703
- wp.launch(kernel, dim=1, inputs=[a, b], outputs=[mins, maxs], device=device)
2704
- wp.launch(output_select_kernel, dim=1, inputs=[maxs, i, j], outputs=[out], device=device)
2705
-
2706
- tape.backward(loss=out)
2707
- expected = np.zeros_like(a.numpy())
2708
- expected[i, j] = 2 if (a.numpy()[i, j] > b.numpy()[i, j]) else 0
2709
- assert_np_equal(tape.gradients[a].numpy(), expected, tol=tol)
2710
- expected[i, j] = 2 if (b.numpy()[i, j] > a.numpy()[i, j]) else 0
2711
- assert_np_equal(tape.gradients[b].numpy(), expected, tol=tol)
2712
- tape.zero()
2713
-
2714
-
2715
- def test_equivalent_types(test, device, dtype, register_kernels=False):
2716
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1017
+ b[tid, 0] = v2[0]
1018
+ b[tid, 1] = v2[1]
1019
+ b[tid, 2] = v2[2]
2717
1020
 
2718
- # vector types
2719
- vec2 = wp.types.vector(length=2, dtype=wptype)
2720
- vec3 = wp.types.vector(length=3, dtype=wptype)
2721
- vec4 = wp.types.vector(length=4, dtype=wptype)
2722
- vec5 = wp.types.vector(length=5, dtype=wptype)
1021
+ def cast_float64(a: wp.array(dtype=wp_type, ndim=2), b: wp.array(dtype=wp64, ndim=2)):
1022
+ tid = wp.tid()
2723
1023
 
2724
- # vector types equivalent to the above
2725
- vec2_equiv = wp.types.vector(length=2, dtype=wptype)
2726
- vec3_equiv = wp.types.vector(length=3, dtype=wptype)
2727
- vec4_equiv = wp.types.vector(length=4, dtype=wptype)
2728
- vec5_equiv = wp.types.vector(length=5, dtype=wptype)
2729
-
2730
- # declare kernel with original types
2731
- def check_equivalence(
2732
- v2: vec2,
2733
- v3: vec3,
2734
- v4: vec4,
2735
- v5: vec5,
2736
- ):
2737
- wp.expect_eq(v2, vec2(wptype(1), wptype(2)))
2738
- wp.expect_eq(v3, vec3(wptype(1), wptype(2), wptype(3)))
2739
- wp.expect_eq(v4, vec4(wptype(1), wptype(2), wptype(3), wptype(4)))
2740
- wp.expect_eq(v5, vec5(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
1024
+ v1 = vec3(a[tid, 0], a[tid, 1], a[tid, 2])
1025
+ v2 = wp.vector(v1, dtype=wp64)
2741
1026
 
2742
- wp.expect_eq(v2, vec2_equiv(wptype(1), wptype(2)))
2743
- wp.expect_eq(v3, vec3_equiv(wptype(1), wptype(2), wptype(3)))
2744
- wp.expect_eq(v4, vec4_equiv(wptype(1), wptype(2), wptype(3), wptype(4)))
2745
- wp.expect_eq(v5, vec5_equiv(wptype(1), wptype(2), wptype(3), wptype(4), wptype(5)))
1027
+ b[tid, 0] = v2[0]
1028
+ b[tid, 1] = v2[1]
1029
+ b[tid, 2] = v2[2]
2746
1030
 
2747
- kernel = getkernel(check_equivalence, suffix=dtype.__name__)
1031
+ kernel_16 = getkernel(cast_float16, suffix=dtype.__name__)
1032
+ kernel_32 = getkernel(cast_float32, suffix=dtype.__name__)
1033
+ kernel_64 = getkernel(cast_float64, suffix=dtype.__name__)
2748
1034
 
2749
1035
  if register_kernels:
2750
1036
  return
2751
1037
 
2752
- # call kernel with equivalent types
2753
- v2 = vec2_equiv(1, 2)
2754
- v3 = vec3_equiv(1, 2, 3)
2755
- v4 = vec4_equiv(1, 2, 3, 4)
2756
- v5 = vec5_equiv(1, 2, 3, 4, 5)
1038
+ # check casting to float 16
1039
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1040
+ b = wp.array(np.zeros((1, 3), dtype=np16), dtype=wp16, requires_grad=True, device=device)
1041
+ b_result = np.ones((1, 3), dtype=np16)
1042
+ b_grad = wp.array(np.ones((1, 3), dtype=np16), dtype=wp16, device=device)
1043
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2757
1044
 
2758
- wp.launch(kernel, dim=1, inputs=[v2, v3, v4, v5], device=device)
1045
+ tape = wp.Tape()
1046
+ with tape:
1047
+ wp.launch(kernel=kernel_16, dim=1, inputs=[a, b], device=device)
2759
1048
 
1049
+ tape.backward(grads={b: b_grad})
1050
+ out = tape.gradients[a].numpy()
2760
1051
 
2761
- def test_conversions(test, device, dtype, register_kernels=False):
2762
- def check_vectors_equal(
2763
- v0: wp.vec3,
2764
- v1: wp.vec3,
2765
- v2: wp.vec3,
2766
- v3: wp.vec3,
2767
- ):
2768
- wp.expect_eq(v1, v0)
2769
- wp.expect_eq(v2, v0)
2770
- wp.expect_eq(v3, v0)
1052
+ assert_np_equal(b.numpy(), b_result)
1053
+ assert_np_equal(out, a_grad.numpy())
2771
1054
 
2772
- kernel = getkernel(check_vectors_equal, suffix=dtype.__name__)
1055
+ # check casting to float 32
1056
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1057
+ b = wp.array(np.zeros((1, 3), dtype=np32), dtype=wp32, requires_grad=True, device=device)
1058
+ b_result = np.ones((1, 3), dtype=np32)
1059
+ b_grad = wp.array(np.ones((1, 3), dtype=np32), dtype=wp32, device=device)
1060
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2773
1061
 
2774
- if register_kernels:
2775
- return
1062
+ tape = wp.Tape()
1063
+ with tape:
1064
+ wp.launch(kernel=kernel_32, dim=1, inputs=[a, b], device=device)
2776
1065
 
2777
- v0 = wp.vec3(1, 2, 3)
1066
+ tape.backward(grads={b: b_grad})
1067
+ out = tape.gradients[a].numpy()
2778
1068
 
2779
- # test explicit conversions - constructing vectors from different containers
2780
- v1 = wp.vec3((1, 2, 3))
2781
- v2 = wp.vec3([1, 2, 3])
2782
- v3 = wp.vec3(np.array([1, 2, 3], dtype=dtype))
1069
+ assert_np_equal(b.numpy(), b_result)
1070
+ assert_np_equal(out, a_grad.numpy())
2783
1071
 
2784
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
1072
+ # check casting to float 64
1073
+ a = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, requires_grad=True, device=device)
1074
+ b = wp.array(np.zeros((1, 3), dtype=np64), dtype=wp64, requires_grad=True, device=device)
1075
+ b_result = np.ones((1, 3), dtype=np64)
1076
+ b_grad = wp.array(np.ones((1, 3), dtype=np64), dtype=wp64, device=device)
1077
+ a_grad = wp.array(np.ones((1, 3), dtype=np_type), dtype=wp_type, device=device)
2785
1078
 
2786
- # test implicit conversions - passing different containers as vectors to wp.launch()
2787
- v1 = (1, 2, 3)
2788
- v2 = [1, 2, 3]
2789
- v3 = np.array([1, 2, 3], dtype=dtype)
1079
+ tape = wp.Tape()
1080
+ with tape:
1081
+ wp.launch(kernel=kernel_64, dim=1, inputs=[a, b], device=device)
1082
+
1083
+ tape.backward(grads={b: b_grad})
1084
+ out = tape.gradients[a].numpy()
1085
+
1086
+ assert_np_equal(b.numpy(), b_result)
1087
+ assert_np_equal(out, a_grad.numpy())
2790
1088
 
2791
- wp.launch(kernel, dim=1, inputs=[v0, v1, v2, v3], device=device)
1089
+
1090
+ @wp.kernel
1091
+ def test_vector_constructor_value_func():
1092
+ a = wp.vec2()
1093
+ b = wp.vector(a, dtype=wp.float16)
1094
+ c = wp.vector(a)
1095
+ d = wp.vector(a, length=2)
2792
1096
 
2793
1097
 
2794
1098
  # Test matrix constructors using explicit type (float16)
@@ -2852,113 +1156,113 @@ def test_constructors_constant_length():
2852
1156
  v[i] = float(i)
2853
1157
 
2854
1158
 
2855
- def register(parent):
2856
- devices = get_test_devices()
1159
+ devices = get_test_devices()
2857
1160
 
2858
- class TestVec(parent):
2859
- pass
2860
1161
 
2861
- add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
2862
- add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
2863
- add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
1162
+ class TestVec(unittest.TestCase):
1163
+ pass
2864
1164
 
2865
- vec10 = wp.types.vector(length=10, dtype=float)
2866
- add_kernel_test(
2867
- TestVec,
2868
- test_vector_mutation,
2869
- dim=1,
2870
- inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
2871
- devices=devices,
2872
- )
2873
1165
 
2874
- for dtype in np_unsigned_int_types:
2875
- add_function_test_register_kernel(
2876
- TestVec,
2877
- f"test_subtraction_unsigned_{dtype.__name__}",
2878
- test_subtraction_unsigned,
2879
- devices=devices,
2880
- dtype=dtype,
2881
- )
1166
+ add_kernel_test(TestVec, test_vector_constructor_value_func, dim=1, devices=devices)
1167
+ add_kernel_test(TestVec, test_constructors_explicit_precision, dim=1, devices=devices)
1168
+ add_kernel_test(TestVec, test_constructors_default_precision, dim=1, devices=devices)
1169
+ add_kernel_test(TestVec, test_constructors_constant_length, dim=1, devices=devices)
2882
1170
 
2883
- for dtype in np_signed_int_types + np_float_types:
2884
- add_function_test_register_kernel(
2885
- TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
2886
- )
2887
- add_function_test_register_kernel(
2888
- TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
2889
- )
1171
+ vec10 = wp.types.vector(length=10, dtype=float)
1172
+ add_kernel_test(
1173
+ TestVec,
1174
+ test_vector_mutation,
1175
+ dim=1,
1176
+ inputs=[vec10(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)],
1177
+ devices=devices,
1178
+ )
2890
1179
 
2891
- for dtype in np_float_types:
2892
- add_function_test_register_kernel(
2893
- TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
2894
- )
2895
- add_function_test_register_kernel(
2896
- TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
2897
- )
2898
- add_function_test_register_kernel(
2899
- TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
2900
- )
1180
+ for dtype in np_unsigned_int_types:
1181
+ add_function_test_register_kernel(
1182
+ TestVec,
1183
+ f"test_subtraction_unsigned_{dtype.__name__}",
1184
+ test_subtraction_unsigned,
1185
+ devices=devices,
1186
+ dtype=dtype,
1187
+ )
2901
1188
 
2902
- for dtype in np_scalar_types:
2903
- add_function_test(TestVec, f"test_arrays_{dtype.__name__}", test_arrays, devices=devices, dtype=dtype)
2904
- add_function_test(TestVec, f"test_components_{dtype.__name__}", test_components, devices=None, dtype=dtype)
2905
- add_function_test_register_kernel(
2906
- TestVec, f"test_constructors_{dtype.__name__}", test_constructors, devices=devices, dtype=dtype
2907
- )
2908
- add_function_test_register_kernel(
2909
- TestVec, f"test_anon_type_instance_{dtype.__name__}", test_anon_type_instance, devices=devices, dtype=dtype
2910
- )
2911
- add_function_test_register_kernel(
2912
- TestVec, f"test_indexing_{dtype.__name__}", test_indexing, devices=devices, dtype=dtype
2913
- )
2914
- add_function_test_register_kernel(
2915
- TestVec, f"test_equality_{dtype.__name__}", test_equality, devices=devices, dtype=dtype
2916
- )
2917
- add_function_test_register_kernel(
2918
- TestVec,
2919
- f"test_scalar_multiplication_{dtype.__name__}",
2920
- test_scalar_multiplication,
2921
- devices=devices,
2922
- dtype=dtype,
2923
- )
2924
- add_function_test_register_kernel(
2925
- TestVec,
2926
- f"test_scalar_multiplication_rightmul_{dtype.__name__}",
2927
- test_scalar_multiplication_rightmul,
2928
- devices=devices,
2929
- dtype=dtype,
2930
- )
2931
- add_function_test_register_kernel(
2932
- TestVec, f"test_cw_multiplication_{dtype.__name__}", test_cw_multiplication, devices=devices, dtype=dtype
2933
- )
2934
- add_function_test_register_kernel(
2935
- TestVec, f"test_scalar_division_{dtype.__name__}", test_scalar_division, devices=devices, dtype=dtype
2936
- )
2937
- add_function_test_register_kernel(
2938
- TestVec, f"test_cw_division_{dtype.__name__}", test_cw_division, devices=devices, dtype=dtype
2939
- )
2940
- add_function_test_register_kernel(
2941
- TestVec, f"test_addition_{dtype.__name__}", test_addition, devices=devices, dtype=dtype
2942
- )
2943
- add_function_test_register_kernel(
2944
- TestVec, f"test_dotproduct_{dtype.__name__}", test_dotproduct, devices=devices, dtype=dtype
2945
- )
2946
- add_function_test_register_kernel(
2947
- TestVec, f"test_equivalent_types_{dtype.__name__}", test_equivalent_types, devices=devices, dtype=dtype
2948
- )
2949
- add_function_test_register_kernel(
2950
- TestVec, f"test_conversions_{dtype.__name__}", test_conversions, devices=devices, dtype=dtype
2951
- )
2952
- add_function_test_register_kernel(
2953
- TestVec, f"test_constants_{dtype.__name__}", test_constants, devices=devices, dtype=dtype
2954
- )
1189
+ for dtype in np_signed_int_types + np_float_types:
1190
+ add_function_test_register_kernel(
1191
+ TestVec, f"test_negation_{dtype.__name__}", test_negation, devices=devices, dtype=dtype
1192
+ )
1193
+ add_function_test_register_kernel(
1194
+ TestVec, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
1195
+ )
2955
1196
 
2956
- # the kernels in this test compile incredibly slowly...
2957
- # add_function_test_register_kernel(TestVec, f"test_minmax_{dtype.__name__}", test_minmax, devices=devices, dtype=dtype)
1197
+ for dtype in np_float_types:
1198
+ add_function_test_register_kernel(
1199
+ TestVec, f"test_crossproduct_{dtype.__name__}", test_crossproduct, devices=devices, dtype=dtype
1200
+ )
1201
+ add_function_test_register_kernel(
1202
+ TestVec, f"test_length_{dtype.__name__}", test_length, devices=devices, dtype=dtype
1203
+ )
1204
+ add_function_test_register_kernel(
1205
+ TestVec, f"test_normalize_{dtype.__name__}", test_normalize, devices=devices, dtype=dtype
1206
+ )
1207
+ add_function_test_register_kernel(
1208
+ TestVec,
1209
+ f"test_casting_constructors_{dtype.__name__}",
1210
+ test_casting_constructors,
1211
+ devices=devices,
1212
+ dtype=dtype,
1213
+ )
2958
1214
 
2959
- return TestVec
1215
+ add_function_test(
1216
+ TestVec,
1217
+ "test_anon_constructor_error_dtype_keyword_missing",
1218
+ test_anon_constructor_error_dtype_keyword_missing,
1219
+ devices=devices,
1220
+ )
1221
+ add_function_test(
1222
+ TestVec,
1223
+ "test_anon_constructor_error_length_mismatch",
1224
+ test_anon_constructor_error_length_mismatch,
1225
+ devices=devices,
1226
+ )
1227
+ add_function_test(
1228
+ TestVec,
1229
+ "test_anon_constructor_error_numeric_arg_missing_1",
1230
+ test_anon_constructor_error_numeric_arg_missing_1,
1231
+ devices=devices,
1232
+ )
1233
+ add_function_test(
1234
+ TestVec,
1235
+ "test_anon_constructor_error_numeric_arg_missing_2",
1236
+ test_anon_constructor_error_numeric_arg_missing_2,
1237
+ devices=devices,
1238
+ )
1239
+ add_function_test(
1240
+ TestVec,
1241
+ "test_anon_constructor_error_dtype_keyword_extraneous",
1242
+ test_anon_constructor_error_dtype_keyword_extraneous,
1243
+ devices=devices,
1244
+ )
1245
+ add_function_test(
1246
+ TestVec,
1247
+ "test_anon_constructor_error_numeric_args_mismatch",
1248
+ test_anon_constructor_error_numeric_args_mismatch,
1249
+ devices=devices,
1250
+ )
1251
+ add_function_test(
1252
+ TestVec,
1253
+ "test_tpl_constructor_error_incompatible_sizes",
1254
+ test_tpl_constructor_error_incompatible_sizes,
1255
+ devices=devices,
1256
+ )
1257
+ add_function_test(
1258
+ TestVec,
1259
+ "test_tpl_constructor_error_numeric_args_mismatch",
1260
+ test_tpl_constructor_error_numeric_args_mismatch,
1261
+ devices=devices,
1262
+ )
1263
+ add_function_test(TestVec, "test_tpl_ops_with_anon", test_tpl_ops_with_anon)
2960
1264
 
2961
1265
 
2962
1266
  if __name__ == "__main__":
2963
- c = register(unittest.TestCase)
1267
+ wp.build.clear_kernel_cache()
2964
1268
  unittest.main(verbosity=2, failfast=True)