warp-lang 1.7.0__py3-none-win_amd64.whl → 1.7.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/autograd.py +12 -2
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +1 -1
- warp/builtins.py +11 -10
- warp/codegen.py +17 -5
- warp/config.py +1 -1
- warp/context.py +6 -0
- warp/examples/benchmarks/benchmark_cloth.py +1 -1
- warp/examples/distributed/example_jacobi_mpi.py +507 -0
- warp/fem/field/field.py +11 -1
- warp/fem/field/nodal_field.py +36 -22
- warp/fem/geometry/adaptive_nanogrid.py +7 -3
- warp/fem/geometry/trimesh.py +4 -12
- warp/jax_experimental/custom_call.py +14 -2
- warp/jax_experimental/ffi.py +5 -1
- warp/native/tile.h +11 -11
- warp/native/warp.cu +1 -1
- warp/render/render_opengl.py +19 -17
- warp/render/render_usd.py +93 -3
- warp/sim/collide.py +11 -9
- warp/sim/inertia.py +189 -156
- warp/sim/integrator_euler.py +3 -0
- warp/sim/integrator_xpbd.py +3 -0
- warp/sim/model.py +29 -12
- warp/sim/render.py +4 -0
- warp/stubs.py +1 -1
- warp/tests/assets/torus.usda +1 -1
- warp/tests/sim/test_collision.py +237 -206
- warp/tests/sim/test_inertia.py +161 -0
- warp/tests/sim/{flaky_test_sim_grad.py → test_sim_grad.py} +4 -0
- warp/tests/sim/test_xpbd.py +399 -0
- warp/tests/test_codegen.py +24 -3
- warp/tests/test_examples.py +16 -6
- warp/tests/test_fem.py +75 -10
- warp/tests/test_mat.py +370 -103
- warp/tests/test_quat.py +321 -137
- warp/tests/test_vec.py +320 -174
- warp/tests/tile/test_tile_load.py +97 -0
- warp/tests/unittest_suites.py +2 -5
- warp/types.py +65 -8
- {warp_lang-1.7.0.dist-info → warp_lang-1.7.1.dist-info}/METADATA +21 -9
- {warp_lang-1.7.0.dist-info → warp_lang-1.7.1.dist-info}/RECORD +46 -43
- {warp_lang-1.7.0.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
- {warp_lang-1.7.0.dist-info → warp_lang-1.7.1.dist-info}/licenses/LICENSE.md +0 -26
- {warp_lang-1.7.0.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0
warp/tests/test_vec.py
CHANGED
|
@@ -58,6 +58,22 @@ def getkernel(func, suffix=""):
|
|
|
58
58
|
return kernel_cache[key]
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
def test_length_mismatch(test, device):
|
|
62
|
+
test.assertNotEqual(wp.vec3f(0.0, 0.0, 0.0), wp.vec2f(0.0, 0.0))
|
|
63
|
+
test.assertNotEqual(wp.vec2f(0.0, 0.0), wp.vec3f(0.0, 0.0, 0.0))
|
|
64
|
+
|
|
65
|
+
@wp.kernel
|
|
66
|
+
def kernel():
|
|
67
|
+
wp.expect_neq(wp.vec3f(0.0, 0.0, 0.0), wp.vec2f(0.0, 0.0))
|
|
68
|
+
wp.expect_neq(wp.vec2f(0.0, 0.0), wp.vec3f(0.0, 0.0, 0.0))
|
|
69
|
+
|
|
70
|
+
with test.assertRaisesRegex(
|
|
71
|
+
RuntimeError,
|
|
72
|
+
r"Can't test equality for objects with different types$",
|
|
73
|
+
):
|
|
74
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
75
|
+
|
|
76
|
+
|
|
61
77
|
def test_anon_constructor_error_length_mismatch(test, device):
|
|
62
78
|
@wp.kernel
|
|
63
79
|
def kernel():
|
|
@@ -1044,122 +1060,13 @@ def test_casting_constructors(test, device, dtype, register_kernels=False):
|
|
|
1044
1060
|
assert_np_equal(out, a_grad.numpy())
|
|
1045
1061
|
|
|
1046
1062
|
|
|
1047
|
-
def test_vector_assign_inplace(test, device, dtype, register_kernels=False):
|
|
1048
|
-
np_type = np.dtype(dtype)
|
|
1049
|
-
wp_type = wp.types.np_dtype_to_warp_type[np_type]
|
|
1050
|
-
|
|
1051
|
-
vec2 = wp.types.vector(length=2, dtype=wp_type)
|
|
1052
|
-
vec3 = wp.types.vector(length=3, dtype=wp_type)
|
|
1053
|
-
vec4 = wp.types.vector(length=4, dtype=wp_type)
|
|
1054
|
-
|
|
1055
|
-
def vectest_read_write_store(
|
|
1056
|
-
x: wp.array(dtype=wp_type), a: wp.array(dtype=vec2), b: wp.array(dtype=vec3), c: wp.array(dtype=vec4)
|
|
1057
|
-
):
|
|
1058
|
-
tid = wp.tid()
|
|
1059
|
-
|
|
1060
|
-
t = a[tid]
|
|
1061
|
-
t[0] = x[tid]
|
|
1062
|
-
a[tid] = t
|
|
1063
|
-
|
|
1064
|
-
u = b[tid]
|
|
1065
|
-
u[1] = x[tid]
|
|
1066
|
-
b[tid] = u
|
|
1067
|
-
|
|
1068
|
-
v = c[tid]
|
|
1069
|
-
v[2] = x[tid]
|
|
1070
|
-
c[tid] = v
|
|
1071
|
-
|
|
1072
|
-
def vectest_in_register(
|
|
1073
|
-
x: wp.array(dtype=wp_type), y: wp.array(dtype=vec3), a: wp.array(dtype=vec2), b: wp.array(dtype=vec3)
|
|
1074
|
-
):
|
|
1075
|
-
tid = wp.tid()
|
|
1076
|
-
|
|
1077
|
-
f = vec3(wp_type(0.0))
|
|
1078
|
-
b_vec = b[tid]
|
|
1079
|
-
f[0] = b_vec[1]
|
|
1080
|
-
f[2] = b_vec[0] * b_vec[1]
|
|
1081
|
-
y[tid] = f
|
|
1082
|
-
|
|
1083
|
-
g = wp_type(0.0)
|
|
1084
|
-
a_vec = a[tid]
|
|
1085
|
-
g = a_vec[0] + a_vec[1]
|
|
1086
|
-
x[tid] = g
|
|
1087
|
-
|
|
1088
|
-
def vectest_component(x: wp.array(dtype=vec3), y: wp.array(dtype=wp_type)):
|
|
1089
|
-
i = wp.tid()
|
|
1090
|
-
|
|
1091
|
-
a = vec3(wp_type(0.0))
|
|
1092
|
-
a.x = wp_type(1.0) * y[i]
|
|
1093
|
-
a.y = wp_type(2.0) * y[i]
|
|
1094
|
-
a.z = wp_type(3.0) * y[i]
|
|
1095
|
-
x[i] = a
|
|
1096
|
-
|
|
1097
|
-
kernel_read_write_store = getkernel(vectest_read_write_store, suffix=dtype.__name__)
|
|
1098
|
-
kernel_in_register = getkernel(vectest_in_register, suffix=dtype.__name__)
|
|
1099
|
-
kernel_component = getkernel(vectest_component, suffix=dtype.__name__)
|
|
1100
|
-
|
|
1101
|
-
if register_kernels:
|
|
1102
|
-
return
|
|
1103
|
-
|
|
1104
|
-
a = wp.ones(1, dtype=vec2, device=device, requires_grad=True)
|
|
1105
|
-
b = wp.ones(1, dtype=vec3, device=device, requires_grad=True)
|
|
1106
|
-
c = wp.ones(1, dtype=vec4, device=device, requires_grad=True)
|
|
1107
|
-
x = wp.full(1, value=2.0, dtype=wp_type, device=device, requires_grad=True)
|
|
1108
|
-
|
|
1109
|
-
tape = wp.Tape()
|
|
1110
|
-
with tape:
|
|
1111
|
-
wp.launch(kernel_read_write_store, dim=1, inputs=[x, a, b, c], device=device)
|
|
1112
|
-
|
|
1113
|
-
tape.backward(
|
|
1114
|
-
grads={
|
|
1115
|
-
a: wp.ones_like(a, requires_grad=False),
|
|
1116
|
-
b: wp.ones_like(b, requires_grad=False),
|
|
1117
|
-
c: wp.ones_like(c, requires_grad=False),
|
|
1118
|
-
}
|
|
1119
|
-
)
|
|
1120
|
-
|
|
1121
|
-
assert_np_equal(a.numpy(), np.array([[2.0, 1.0]], dtype=np_type))
|
|
1122
|
-
assert_np_equal(b.numpy(), np.array([[1.0, 2.0, 1.0]], dtype=np_type))
|
|
1123
|
-
assert_np_equal(c.numpy(), np.array([[1.0, 1.0, 2.0, 1.0]], dtype=np_type))
|
|
1124
|
-
assert_np_equal(x.grad.numpy(), np.array([3.0], dtype=np_type))
|
|
1125
|
-
|
|
1126
|
-
tape.reset()
|
|
1127
|
-
|
|
1128
|
-
a = wp.ones(1, dtype=vec2, device=device, requires_grad=True)
|
|
1129
|
-
b = wp.ones(1, dtype=vec3, device=device, requires_grad=True)
|
|
1130
|
-
x = wp.zeros(1, dtype=wp_type, device=device, requires_grad=True)
|
|
1131
|
-
y = wp.zeros(1, dtype=vec3, device=device, requires_grad=True)
|
|
1132
|
-
|
|
1133
|
-
with tape:
|
|
1134
|
-
wp.launch(kernel_in_register, dim=1, inputs=[x, y, a, b], device=device)
|
|
1135
|
-
|
|
1136
|
-
tape.backward(grads={x: wp.ones_like(x, requires_grad=False), y: wp.ones_like(y, requires_grad=False)})
|
|
1137
|
-
|
|
1138
|
-
assert_np_equal(x.numpy(), np.array([2.0], dtype=np_type))
|
|
1139
|
-
assert_np_equal(y.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=np_type))
|
|
1140
|
-
assert_np_equal(a.grad.numpy(), np.array([[1.0, 1.0]], dtype=np_type))
|
|
1141
|
-
assert_np_equal(b.grad.numpy(), np.array([[1.0, 2.0, 0.0]], dtype=np_type))
|
|
1142
|
-
|
|
1143
|
-
tape.reset()
|
|
1144
|
-
|
|
1145
|
-
x = wp.zeros(1, dtype=vec3, device=device, requires_grad=True)
|
|
1146
|
-
y = wp.ones(1, dtype=wp_type, device=device, requires_grad=True)
|
|
1147
|
-
|
|
1148
|
-
with tape:
|
|
1149
|
-
wp.launch(kernel_component, dim=1, inputs=[x, y], device=device)
|
|
1150
|
-
|
|
1151
|
-
tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
|
|
1152
|
-
|
|
1153
|
-
assert_np_equal(x.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=np_type))
|
|
1154
|
-
assert_np_equal(y.grad.numpy(), np.array([6.0], dtype=np_type))
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
1063
|
@wp.kernel
|
|
1158
1064
|
def test_vector_constructor_value_func():
|
|
1159
1065
|
a = wp.vec2()
|
|
1160
1066
|
b = wp.vector(a, dtype=wp.float16)
|
|
1161
1067
|
c = wp.vector(a)
|
|
1162
1068
|
d = wp.vector(a, length=2)
|
|
1069
|
+
e = wp.vector(1.0, 2.0, 3.0, dtype=float)
|
|
1163
1070
|
|
|
1164
1071
|
|
|
1165
1072
|
# Test matrix constructors using explicit type (float16)
|
|
@@ -1272,86 +1179,329 @@ def test_vector_len(test, device):
|
|
|
1272
1179
|
|
|
1273
1180
|
|
|
1274
1181
|
@wp.kernel
|
|
1275
|
-
def
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1182
|
+
def vec_extract_subscript(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=float)):
|
|
1183
|
+
tid = wp.tid()
|
|
1184
|
+
|
|
1185
|
+
a = x[tid]
|
|
1186
|
+
b = a[0] + 2.0 * a[1] + 3.0 * a[2]
|
|
1187
|
+
y[tid] = b
|
|
1279
1188
|
|
|
1280
|
-
v1 = wp.vec3()
|
|
1281
|
-
v2 = b[i]
|
|
1282
1189
|
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1190
|
+
@wp.kernel
|
|
1191
|
+
def vec_extract_attribute(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=float)):
|
|
1192
|
+
tid = wp.tid()
|
|
1286
1193
|
|
|
1287
|
-
a
|
|
1194
|
+
a = x[tid]
|
|
1195
|
+
b = a.x + float(2.0) * a.y + 3.0 * a.z
|
|
1196
|
+
y[tid] = b
|
|
1288
1197
|
|
|
1289
|
-
v3 = wp.vec3()
|
|
1290
|
-
v4 = d[i]
|
|
1291
1198
|
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1199
|
+
def test_vec_extract(test, device):
|
|
1200
|
+
def run(kernel):
|
|
1201
|
+
x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1202
|
+
y = wp.zeros(1, dtype=float, requires_grad=True, device=device)
|
|
1295
1203
|
|
|
1296
|
-
|
|
1204
|
+
tape = wp.Tape()
|
|
1205
|
+
with tape:
|
|
1206
|
+
wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
|
|
1297
1207
|
|
|
1208
|
+
y.grad = wp.ones_like(y)
|
|
1209
|
+
tape.backward()
|
|
1298
1210
|
|
|
1299
|
-
|
|
1300
|
-
|
|
1211
|
+
assert_np_equal(y.numpy(), np.array([6.0], dtype=float))
|
|
1212
|
+
assert_np_equal(x.grad.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
|
|
1301
1213
|
|
|
1302
|
-
|
|
1303
|
-
|
|
1214
|
+
run(vec_extract_subscript)
|
|
1215
|
+
run(vec_extract_attribute)
|
|
1304
1216
|
|
|
1305
|
-
c = wp.zeros(N, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1306
|
-
d = wp.ones(N, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1307
1217
|
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1218
|
+
@wp.kernel
|
|
1219
|
+
def vec_assign_subscript(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
|
|
1220
|
+
i = wp.tid()
|
|
1221
|
+
|
|
1222
|
+
a = wp.vec3()
|
|
1223
|
+
a[0] = 1.0 * x[i]
|
|
1224
|
+
a[1] = 2.0 * x[i]
|
|
1225
|
+
a[2] = 3.0 * x[i]
|
|
1226
|
+
y[i] = a
|
|
1227
|
+
|
|
1228
|
+
|
|
1229
|
+
@wp.kernel
|
|
1230
|
+
def vec_assign_attribute(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
|
|
1231
|
+
i = wp.tid()
|
|
1232
|
+
|
|
1233
|
+
a = wp.vec3()
|
|
1234
|
+
a.x = 1.0 * x[i]
|
|
1235
|
+
a.y = 2.0 * x[i]
|
|
1236
|
+
a.z = 3.0 * x[i]
|
|
1237
|
+
y[i] = a
|
|
1238
|
+
|
|
1239
|
+
|
|
1240
|
+
def test_vec_assign(test, device):
|
|
1241
|
+
def run(kernel):
|
|
1242
|
+
x = wp.ones(1, dtype=float, requires_grad=True, device=device)
|
|
1243
|
+
y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1244
|
+
|
|
1245
|
+
tape = wp.Tape()
|
|
1246
|
+
with tape:
|
|
1247
|
+
wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
|
|
1311
1248
|
|
|
1312
|
-
|
|
1249
|
+
y.grad = wp.ones_like(y)
|
|
1250
|
+
tape.backward()
|
|
1313
1251
|
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
|
|
1252
|
+
assert_np_equal(y.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
|
|
1253
|
+
assert_np_equal(x.grad.numpy(), np.array([6.0], dtype=float))
|
|
1317
1254
|
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
|
|
1255
|
+
run(vec_assign_subscript)
|
|
1256
|
+
run(vec_assign_attribute)
|
|
1321
1257
|
|
|
1322
1258
|
|
|
1323
|
-
def
|
|
1259
|
+
def test_vec_assign_copy(test, device):
|
|
1324
1260
|
saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
|
|
1325
1261
|
try:
|
|
1326
1262
|
wp.config.enable_vector_component_overwrites = True
|
|
1327
1263
|
|
|
1328
1264
|
@wp.kernel
|
|
1329
|
-
def
|
|
1265
|
+
def vec_assign_overwrite(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1330
1266
|
tid = wp.tid()
|
|
1331
1267
|
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1268
|
+
a = wp.vec3()
|
|
1269
|
+
b = x[tid]
|
|
1270
|
+
a = b
|
|
1271
|
+
a[1] = 3.0
|
|
1336
1272
|
|
|
1337
|
-
|
|
1273
|
+
y[tid] = a
|
|
1338
1274
|
|
|
1339
|
-
x = wp.
|
|
1340
|
-
|
|
1275
|
+
x = wp.ones(1, dtype=wp.vec3, device=device, requires_grad=True)
|
|
1276
|
+
y = wp.zeros(1, dtype=wp.vec3, device=device, requires_grad=True)
|
|
1341
1277
|
|
|
1342
1278
|
tape = wp.Tape()
|
|
1343
1279
|
with tape:
|
|
1344
|
-
wp.launch(
|
|
1280
|
+
wp.launch(vec_assign_overwrite, dim=1, inputs=[x, y], device=device)
|
|
1345
1281
|
|
|
1346
|
-
|
|
1282
|
+
y.grad = wp.ones_like(y, requires_grad=False)
|
|
1283
|
+
tape.backward()
|
|
1347
1284
|
|
|
1348
|
-
assert_np_equal(
|
|
1349
|
-
assert_np_equal(
|
|
1285
|
+
assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0]], dtype=float))
|
|
1286
|
+
assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=float))
|
|
1350
1287
|
|
|
1351
1288
|
finally:
|
|
1352
1289
|
wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
|
|
1353
1290
|
|
|
1354
1291
|
|
|
1292
|
+
@wp.kernel
|
|
1293
|
+
def vec_array_extract_subscript(x: wp.array2d(dtype=wp.vec3), y: wp.array2d(dtype=float)):
|
|
1294
|
+
i, j = wp.tid()
|
|
1295
|
+
a = x[i, j][0]
|
|
1296
|
+
b = x[i, j][1]
|
|
1297
|
+
c = x[i, j][2]
|
|
1298
|
+
y[i, j] = 1.0 * a + 2.0 * b + 3.0 * c
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
@wp.kernel
|
|
1302
|
+
def vec_array_extract_attribute(x: wp.array2d(dtype=wp.vec3), y: wp.array2d(dtype=float)):
|
|
1303
|
+
i, j = wp.tid()
|
|
1304
|
+
a = x[i, j].x
|
|
1305
|
+
b = x[i, j].y
|
|
1306
|
+
c = x[i, j].z
|
|
1307
|
+
y[i, j] = 1.0 * a + 2.0 * b + 3.0 * c
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
def test_vec_array_extract(test, device):
|
|
1311
|
+
def run(kernel):
|
|
1312
|
+
x = wp.ones((1, 1), dtype=wp.vec3, requires_grad=True, device=device)
|
|
1313
|
+
y = wp.zeros((1, 1), dtype=float, requires_grad=True, device=device)
|
|
1314
|
+
|
|
1315
|
+
tape = wp.Tape()
|
|
1316
|
+
with tape:
|
|
1317
|
+
wp.launch(kernel, (1, 1), inputs=[x], outputs=[y], device=device)
|
|
1318
|
+
|
|
1319
|
+
y.grad = wp.ones_like(y)
|
|
1320
|
+
tape.backward()
|
|
1321
|
+
|
|
1322
|
+
assert_np_equal(y.numpy(), np.array([[6.0]], dtype=float))
|
|
1323
|
+
assert_np_equal(x.grad.numpy(), np.array([[[1.0, 2.0, 3.0]]], dtype=float))
|
|
1324
|
+
|
|
1325
|
+
run(vec_array_extract_subscript)
|
|
1326
|
+
run(vec_array_extract_attribute)
|
|
1327
|
+
|
|
1328
|
+
|
|
1329
|
+
@wp.kernel
|
|
1330
|
+
def vec_array_assign_subscript(x: wp.array2d(dtype=float), y: wp.array2d(dtype=wp.vec3)):
|
|
1331
|
+
i, j = wp.tid()
|
|
1332
|
+
|
|
1333
|
+
y[i, j][0] = 1.0 * x[i, j]
|
|
1334
|
+
y[i, j][1] = 2.0 * x[i, j]
|
|
1335
|
+
y[i, j][2] = 3.0 * x[i, j]
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
@wp.kernel
|
|
1339
|
+
def vec_array_assign_attribute(x: wp.array2d(dtype=float), y: wp.array2d(dtype=wp.vec3)):
|
|
1340
|
+
i, j = wp.tid()
|
|
1341
|
+
|
|
1342
|
+
y[i, j].x = 1.0 * x[i, j]
|
|
1343
|
+
y[i, j].y = 2.0 * x[i, j]
|
|
1344
|
+
y[i, j].z = 3.0 * x[i, j]
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
def test_vec_array_assign(test, device):
|
|
1348
|
+
def run(kernel):
|
|
1349
|
+
x = wp.ones((1, 1), dtype=float, requires_grad=True, device=device)
|
|
1350
|
+
y = wp.zeros((1, 1), dtype=wp.vec3, requires_grad=True, device=device)
|
|
1351
|
+
|
|
1352
|
+
tape = wp.Tape()
|
|
1353
|
+
with tape:
|
|
1354
|
+
wp.launch(kernel, (1, 1), inputs=[x], outputs=[y], device=device)
|
|
1355
|
+
|
|
1356
|
+
y.grad = wp.ones_like(y)
|
|
1357
|
+
tape.backward()
|
|
1358
|
+
|
|
1359
|
+
assert_np_equal(y.numpy(), np.array([[[1.0, 2.0, 3.0]]], dtype=float))
|
|
1360
|
+
# TODO: gradient propagation for in-place array assignment
|
|
1361
|
+
# assert_np_equal(x.grad.numpy(), np.array([[6.0]], dtype=float))
|
|
1362
|
+
|
|
1363
|
+
run(vec_array_assign_subscript)
|
|
1364
|
+
run(vec_array_assign_attribute)
|
|
1365
|
+
|
|
1366
|
+
|
|
1367
|
+
@wp.kernel
|
|
1368
|
+
def vec_add_inplace_subscript(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1369
|
+
i = wp.tid()
|
|
1370
|
+
|
|
1371
|
+
a = wp.vec3()
|
|
1372
|
+
b = x[i]
|
|
1373
|
+
|
|
1374
|
+
a[0] += 1.0 * b[0]
|
|
1375
|
+
a[1] += 2.0 * b[1]
|
|
1376
|
+
a[2] += 3.0 * b[2]
|
|
1377
|
+
|
|
1378
|
+
y[i] = a
|
|
1379
|
+
|
|
1380
|
+
|
|
1381
|
+
@wp.kernel
|
|
1382
|
+
def vec_add_inplace_attribute(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1383
|
+
i = wp.tid()
|
|
1384
|
+
|
|
1385
|
+
a = wp.vec3()
|
|
1386
|
+
b = x[i]
|
|
1387
|
+
|
|
1388
|
+
a.x += 1.0 * b.x
|
|
1389
|
+
a.y += 2.0 * b.y
|
|
1390
|
+
a.z += 3.0 * b.z
|
|
1391
|
+
|
|
1392
|
+
y[i] = a
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def test_vec_add_inplace(test, device):
|
|
1396
|
+
def run(kernel):
|
|
1397
|
+
x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1398
|
+
y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1399
|
+
|
|
1400
|
+
tape = wp.Tape()
|
|
1401
|
+
with tape:
|
|
1402
|
+
wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
|
|
1403
|
+
|
|
1404
|
+
y.grad = wp.ones_like(y)
|
|
1405
|
+
tape.backward()
|
|
1406
|
+
|
|
1407
|
+
assert_np_equal(y.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
|
|
1408
|
+
assert_np_equal(x.grad.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
|
|
1409
|
+
|
|
1410
|
+
run(vec_add_inplace_subscript)
|
|
1411
|
+
run(vec_add_inplace_attribute)
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
@wp.kernel
|
|
1415
|
+
def vec_sub_inplace_subscript(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1416
|
+
i = wp.tid()
|
|
1417
|
+
|
|
1418
|
+
a = wp.vec3()
|
|
1419
|
+
b = x[i]
|
|
1420
|
+
|
|
1421
|
+
a[0] -= 1.0 * b[0]
|
|
1422
|
+
a[1] -= 2.0 * b[1]
|
|
1423
|
+
a[2] -= 3.0 * b[2]
|
|
1424
|
+
|
|
1425
|
+
y[i] = a
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
@wp.kernel
|
|
1429
|
+
def vec_sub_inplace_attribute(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1430
|
+
i = wp.tid()
|
|
1431
|
+
|
|
1432
|
+
a = wp.vec3()
|
|
1433
|
+
b = x[i]
|
|
1434
|
+
|
|
1435
|
+
a.x -= 1.0 * b.x
|
|
1436
|
+
a.y -= 2.0 * b.y
|
|
1437
|
+
a.z -= 3.0 * b.z
|
|
1438
|
+
|
|
1439
|
+
y[i] = a
|
|
1440
|
+
|
|
1441
|
+
|
|
1442
|
+
def test_vec_sub_inplace(test, device):
|
|
1443
|
+
def run(kernel):
|
|
1444
|
+
x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1445
|
+
y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1446
|
+
|
|
1447
|
+
tape = wp.Tape()
|
|
1448
|
+
with tape:
|
|
1449
|
+
wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
|
|
1450
|
+
|
|
1451
|
+
y.grad = wp.ones_like(y)
|
|
1452
|
+
tape.backward()
|
|
1453
|
+
|
|
1454
|
+
assert_np_equal(y.numpy(), np.array([[-1.0, -2.0, -3.0]], dtype=float))
|
|
1455
|
+
assert_np_equal(x.grad.numpy(), np.array([[-1.0, -2.0, -3.0]], dtype=float))
|
|
1456
|
+
|
|
1457
|
+
run(vec_sub_inplace_subscript)
|
|
1458
|
+
run(vec_sub_inplace_attribute)
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
@wp.kernel
|
|
1462
|
+
def vec_array_add_inplace(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1463
|
+
i = wp.tid()
|
|
1464
|
+
|
|
1465
|
+
y[i] += x[i]
|
|
1466
|
+
|
|
1467
|
+
|
|
1468
|
+
def test_vec_array_add_inplace(test, device):
|
|
1469
|
+
x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1470
|
+
y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1471
|
+
|
|
1472
|
+
tape = wp.Tape()
|
|
1473
|
+
with tape:
|
|
1474
|
+
wp.launch(vec_array_add_inplace, 1, inputs=[x], outputs=[y], device=device)
|
|
1475
|
+
|
|
1476
|
+
y.grad = wp.ones_like(y)
|
|
1477
|
+
tape.backward()
|
|
1478
|
+
|
|
1479
|
+
assert_np_equal(y.numpy(), np.array([[1.0, 1.0, 1.0]], dtype=float))
|
|
1480
|
+
assert_np_equal(x.grad.numpy(), np.array([[1.0, 1.0, 1.0]], dtype=float))
|
|
1481
|
+
|
|
1482
|
+
|
|
1483
|
+
@wp.kernel
|
|
1484
|
+
def vec_array_sub_inplace(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
1485
|
+
i = wp.tid()
|
|
1486
|
+
|
|
1487
|
+
y[i] -= x[i]
|
|
1488
|
+
|
|
1489
|
+
|
|
1490
|
+
def test_vec_array_sub_inplace(test, device):
|
|
1491
|
+
x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1492
|
+
y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
|
|
1493
|
+
|
|
1494
|
+
tape = wp.Tape()
|
|
1495
|
+
with tape:
|
|
1496
|
+
wp.launch(vec_array_sub_inplace, 1, inputs=[x], outputs=[y], device=device)
|
|
1497
|
+
|
|
1498
|
+
y.grad = wp.ones_like(y)
|
|
1499
|
+
tape.backward()
|
|
1500
|
+
|
|
1501
|
+
assert_np_equal(y.numpy(), np.array([[-1.0, -1.0, -1.0]], dtype=float))
|
|
1502
|
+
assert_np_equal(x.grad.numpy(), np.array([[-1.0, -1.0, -1.0]], dtype=float))
|
|
1503
|
+
|
|
1504
|
+
|
|
1355
1505
|
devices = get_test_devices()
|
|
1356
1506
|
|
|
1357
1507
|
|
|
@@ -1418,14 +1568,13 @@ for dtype in np_float_types:
|
|
|
1418
1568
|
devices=devices,
|
|
1419
1569
|
dtype=dtype,
|
|
1420
1570
|
)
|
|
1421
|
-
add_function_test_register_kernel(
|
|
1422
|
-
TestVec,
|
|
1423
|
-
f"test_vector_assign_inplace_{dtype.__name__}",
|
|
1424
|
-
test_vector_assign_inplace,
|
|
1425
|
-
devices=devices,
|
|
1426
|
-
dtype=dtype,
|
|
1427
|
-
)
|
|
1428
1571
|
|
|
1572
|
+
add_function_test(
|
|
1573
|
+
TestVec,
|
|
1574
|
+
"test_length_mismatch",
|
|
1575
|
+
test_length_mismatch,
|
|
1576
|
+
devices=devices,
|
|
1577
|
+
)
|
|
1429
1578
|
add_function_test(
|
|
1430
1579
|
TestVec,
|
|
1431
1580
|
"test_anon_constructor_error_length_mismatch",
|
|
@@ -1468,18 +1617,15 @@ add_function_test(
|
|
|
1468
1617
|
test_vector_len,
|
|
1469
1618
|
devices=devices,
|
|
1470
1619
|
)
|
|
1471
|
-
add_function_test(
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
)
|
|
1477
|
-
add_function_test(
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
test_vector_assign_copy,
|
|
1481
|
-
devices=devices,
|
|
1482
|
-
)
|
|
1620
|
+
add_function_test(TestVec, "test_vec_extract", test_vec_extract, devices=devices)
|
|
1621
|
+
add_function_test(TestVec, "test_vec_assign", test_vec_assign, devices=devices)
|
|
1622
|
+
add_function_test(TestVec, "test_vec_assign_copy", test_vec_assign_copy, devices=devices)
|
|
1623
|
+
add_function_test(TestVec, "test_vec_array_extract", test_vec_array_extract, devices=devices)
|
|
1624
|
+
add_function_test(TestVec, "test_vec_array_assign", test_vec_array_assign, devices=devices)
|
|
1625
|
+
add_function_test(TestVec, "test_vec_add_inplace", test_vec_add_inplace, devices=devices)
|
|
1626
|
+
add_function_test(TestVec, "test_vec_sub_inplace", test_vec_sub_inplace, devices=devices)
|
|
1627
|
+
add_function_test(TestVec, "test_vec_array_add_inplace", test_vec_array_add_inplace, devices=devices)
|
|
1628
|
+
add_function_test(TestVec, "test_vec_array_sub_inplace", test_vec_array_sub_inplace, devices=devices)
|
|
1483
1629
|
|
|
1484
1630
|
|
|
1485
1631
|
if __name__ == "__main__":
|
|
@@ -184,6 +184,96 @@ def test_tile_load_unaligned(test, device):
|
|
|
184
184
|
assert_np_equal(input.grad.numpy(), expected_grad)
|
|
185
185
|
|
|
186
186
|
|
|
187
|
+
@wp.kernel
|
|
188
|
+
def tile_load_aligned_small_kernel(
|
|
189
|
+
input: wp.array2d(dtype=float),
|
|
190
|
+
output: wp.array2d(dtype=float),
|
|
191
|
+
):
|
|
192
|
+
t = wp.tile_load(input, shape=(3, 3), offset=(0, 0), storage="shared")
|
|
193
|
+
wp.tile_store(output, t, offset=(0, 0))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# regression test for tiles that are smaller than sizeof(float4) in that last
|
|
197
|
+
# dimension but are aligned to float4. Did trigger the fast float4 path by accident.
|
|
198
|
+
def test_tile_load_aligned_small(test, device):
|
|
199
|
+
rng = np.random.default_rng(42)
|
|
200
|
+
|
|
201
|
+
shape = [TILE_M, TILE_N]
|
|
202
|
+
|
|
203
|
+
input = wp.array(rng.random(shape), dtype=float, requires_grad=True, device=device)
|
|
204
|
+
output = wp.zeros(shape, dtype=float, device=device)
|
|
205
|
+
|
|
206
|
+
wp.launch_tiled(
|
|
207
|
+
tile_load_aligned_small_kernel,
|
|
208
|
+
dim=[1],
|
|
209
|
+
inputs=[input, output],
|
|
210
|
+
block_dim=TILE_DIM,
|
|
211
|
+
device=device,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# zeros except for the 3x3 tile at 0, 0
|
|
215
|
+
assert_np_equal(output.numpy()[3:, :], np.zeros((TILE_M - 3, TILE_N)))
|
|
216
|
+
assert_np_equal(output.numpy()[:, 3:], np.zeros((TILE_M, TILE_N - 3)))
|
|
217
|
+
|
|
218
|
+
# check output elements
|
|
219
|
+
assert_np_equal(output.numpy()[:3, :3], input.numpy()[:3, :3])
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
TILE_WIDTH = 5
|
|
223
|
+
TILE_OFFSET_X = 0
|
|
224
|
+
TILE_OFFSET_Y = 8
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@wp.kernel
|
|
228
|
+
def test_tile_load_aligned_offset_unaligned_size_kernel(
|
|
229
|
+
input: wp.array2d(dtype=float),
|
|
230
|
+
output: wp.array2d(dtype=float),
|
|
231
|
+
):
|
|
232
|
+
# Load a 5x5 tile from the input array starting at offset (0,8)
|
|
233
|
+
# and store it in shared memory
|
|
234
|
+
tile = wp.tile_load(input, shape=(TILE_WIDTH, TILE_WIDTH), offset=(TILE_OFFSET_X, TILE_OFFSET_Y), storage="shared")
|
|
235
|
+
|
|
236
|
+
# Store the loaded tile back to the output array at the same offset
|
|
237
|
+
wp.tile_store(output, tile, offset=(TILE_OFFSET_X, TILE_OFFSET_Y))
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_tile_load_aligned_offset_unaligned_size(test, device):
|
|
241
|
+
"""Test loading a tile with aligned offset but unaligned size."""
|
|
242
|
+
|
|
243
|
+
rng = np.random.default_rng(42)
|
|
244
|
+
array_shape = [TILE_N, TILE_M]
|
|
245
|
+
|
|
246
|
+
input_array = wp.array(rng.random(array_shape), dtype=float, requires_grad=True, device=device)
|
|
247
|
+
output_array = wp.zeros(array_shape, dtype=float, device=device)
|
|
248
|
+
|
|
249
|
+
wp.launch_tiled(
|
|
250
|
+
test_tile_load_aligned_offset_unaligned_size_kernel,
|
|
251
|
+
dim=[1],
|
|
252
|
+
inputs=[input_array, output_array],
|
|
253
|
+
block_dim=TILE_DIM,
|
|
254
|
+
device=device,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Region before the tile offset should be zeros
|
|
258
|
+
assert_np_equal(output_array.numpy()[:TILE_WIDTH, :TILE_OFFSET_Y], np.zeros((TILE_WIDTH, TILE_OFFSET_Y)))
|
|
259
|
+
|
|
260
|
+
# Region where the tile was loaded/stored should match input
|
|
261
|
+
assert_np_equal(
|
|
262
|
+
output_array.numpy()[:TILE_WIDTH, TILE_OFFSET_Y : TILE_OFFSET_Y + TILE_WIDTH],
|
|
263
|
+
input_array.numpy()[:TILE_WIDTH, TILE_OFFSET_Y : TILE_OFFSET_Y + TILE_WIDTH],
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Region after the tile should be zeros
|
|
267
|
+
remaining_width = TILE_M - (TILE_OFFSET_Y + TILE_WIDTH)
|
|
268
|
+
assert_np_equal(
|
|
269
|
+
output_array.numpy()[:TILE_WIDTH, TILE_OFFSET_Y + TILE_WIDTH :], np.zeros((TILE_WIDTH, remaining_width))
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Rows below the tile should all be zeros
|
|
273
|
+
remaining_height = TILE_N - TILE_WIDTH
|
|
274
|
+
assert_np_equal(output_array.numpy()[TILE_WIDTH:, :], np.zeros((remaining_height, TILE_M)))
|
|
275
|
+
|
|
276
|
+
|
|
187
277
|
# ----------------------------------------------------------------------------------------
|
|
188
278
|
|
|
189
279
|
TILE_SIZE = 4
|
|
@@ -388,6 +478,13 @@ add_function_test(TestTileLoad, "test_tile_load_2d", test_tile_load(tile_load_2d
|
|
|
388
478
|
add_function_test(TestTileLoad, "test_tile_load_3d", test_tile_load(tile_load_3d_kernel, 3), devices=devices)
|
|
389
479
|
add_function_test(TestTileLoad, "test_tile_load_4d", test_tile_load(tile_load_4d_kernel, 4), devices=devices)
|
|
390
480
|
add_function_test(TestTileLoad, "test_tile_load_unaligned", test_tile_load_unaligned, devices=devices)
|
|
481
|
+
add_function_test(TestTileLoad, "test_tile_load_aligned_small", test_tile_load_aligned_small, devices=devices)
|
|
482
|
+
add_function_test(
|
|
483
|
+
TestTileLoad,
|
|
484
|
+
"test_tile_load_aligned_offset_unaligned_size",
|
|
485
|
+
test_tile_load_aligned_offset_unaligned_size,
|
|
486
|
+
devices=devices,
|
|
487
|
+
)
|
|
391
488
|
|
|
392
489
|
add_function_test(TestTileLoad, "test_tile_extract_1d", test_tile_extract(tile_extract_1d_kernel, 1), devices=devices)
|
|
393
490
|
add_function_test(TestTileLoad, "test_tile_extract_2d", test_tile_extract(tile_extract_2d_kernel, 2), devices=devices)
|