warp-lang 1.3.3__py3-none-macosx_10_13_universal2.whl → 1.4.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +6 -0
- warp/autograd.py +59 -6
- warp/bin/libwarp.dylib +0 -0
- warp/build_dll.py +8 -10
- warp/builtins.py +103 -3
- warp/codegen.py +447 -53
- warp/config.py +1 -1
- warp/context.py +682 -405
- warp/dlpack.py +2 -0
- warp/examples/benchmarks/benchmark_cloth.py +10 -0
- warp/examples/core/example_render_opengl.py +12 -10
- warp/examples/fem/example_adaptive_grid.py +251 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +2 -2
- warp/examples/fem/example_magnetostatics.py +1 -1
- warp/examples/fem/example_streamlines.py +1 -0
- warp/examples/fem/utils.py +25 -5
- warp/examples/sim/example_cloth.py +50 -6
- warp/fem/__init__.py +2 -0
- warp/fem/adaptivity.py +493 -0
- warp/fem/field/field.py +2 -1
- warp/fem/field/nodal_field.py +18 -26
- warp/fem/field/test.py +4 -4
- warp/fem/field/trial.py +4 -4
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/adaptive_nanogrid.py +843 -0
- warp/fem/geometry/nanogrid.py +55 -28
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/nanogrid_function_space.py +69 -35
- warp/fem/utils.py +118 -107
- warp/jax_experimental.py +28 -15
- warp/native/array.h +0 -1
- warp/native/builtin.h +103 -6
- warp/native/bvh.cu +4 -2
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/error.cpp +4 -2
- warp/native/exports.h +99 -0
- warp/native/mat.h +97 -0
- warp/native/mesh.cpp +36 -0
- warp/native/mesh.cu +52 -1
- warp/native/mesh.h +1 -0
- warp/native/quat.h +43 -0
- warp/native/range.h +11 -2
- warp/native/spatial.h +6 -0
- warp/native/vec.h +74 -0
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +10 -3
- warp/native/warp.h +8 -1
- warp/paddle.py +382 -0
- warp/sim/__init__.py +1 -0
- warp/sim/collide.py +519 -0
- warp/sim/integrator_euler.py +18 -5
- warp/sim/integrator_featherstone.py +5 -5
- warp/sim/integrator_vbd.py +1026 -0
- warp/sim/integrator_xpbd.py +2 -6
- warp/sim/model.py +50 -25
- warp/sparse.py +9 -7
- warp/stubs.py +459 -0
- warp/tape.py +2 -0
- warp/tests/aux_test_dependent.py +1 -0
- warp/tests/aux_test_name_clash1.py +32 -0
- warp/tests/aux_test_name_clash2.py +32 -0
- warp/tests/aux_test_square.py +1 -0
- warp/tests/test_array.py +188 -0
- warp/tests/test_async.py +3 -3
- warp/tests/test_atomic.py +6 -0
- warp/tests/test_closest_point_edge_edge.py +93 -1
- warp/tests/test_codegen.py +93 -15
- warp/tests/test_codegen_instancing.py +1457 -0
- warp/tests/test_collision.py +486 -0
- warp/tests/test_compile_consts.py +3 -28
- warp/tests/test_dlpack.py +170 -0
- warp/tests/test_examples.py +22 -8
- warp/tests/test_fast_math.py +10 -4
- warp/tests/test_fem.py +81 -1
- warp/tests/test_func.py +46 -0
- warp/tests/test_implicit_init.py +49 -0
- warp/tests/test_jax.py +58 -0
- warp/tests/test_mat.py +84 -0
- warp/tests/test_mesh_query_point.py +188 -0
- warp/tests/test_model.py +13 -0
- warp/tests/test_module_hashing.py +40 -0
- warp/tests/test_multigpu.py +3 -3
- warp/tests/test_overwrite.py +8 -0
- warp/tests/test_paddle.py +852 -0
- warp/tests/test_print.py +89 -0
- warp/tests/test_quat.py +111 -0
- warp/tests/test_reload.py +31 -1
- warp/tests/test_scalar_ops.py +2 -0
- warp/tests/test_static.py +568 -0
- warp/tests/test_streams.py +64 -3
- warp/tests/test_struct.py +4 -4
- warp/tests/test_torch.py +24 -0
- warp/tests/test_triangle_closest_point.py +137 -0
- warp/tests/test_types.py +1 -1
- warp/tests/test_vbd.py +386 -0
- warp/tests/test_vec.py +143 -0
- warp/tests/test_vec_scalar_ops.py +139 -0
- warp/tests/unittest_suites.py +12 -0
- warp/tests/unittest_utils.py +9 -5
- warp/thirdparty/dlpack.py +3 -1
- warp/types.py +167 -36
- warp/utils.py +37 -14
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/METADATA +10 -8
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/RECORD +109 -97
- warp/tests/test_point_triangle_closest_point.py +0 -143
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/top_level.txt +0 -0
warp/jax_experimental.py
CHANGED
|
@@ -21,17 +21,22 @@ _registered_kernels = [None]
|
|
|
21
21
|
_registered_kernel_to_id = {}
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def jax_kernel(wp_kernel):
|
|
24
|
+
def jax_kernel(wp_kernel, launch_dims=None):
|
|
25
25
|
"""Create a Jax primitive from a Warp kernel.
|
|
26
26
|
|
|
27
27
|
NOTE: This is an experimental feature under development.
|
|
28
28
|
|
|
29
|
+
Args:
|
|
30
|
+
wp_kernel: The Warp kernel to be wrapped.
|
|
31
|
+
launch_dims: Optional. Specify the kernel launch dimensions. If None,
|
|
32
|
+
dimensions are inferred from the shape of the first argument.
|
|
33
|
+
This option when set will specify the output dimensions.
|
|
34
|
+
|
|
29
35
|
Current limitations:
|
|
30
36
|
- All kernel arguments must be arrays.
|
|
31
|
-
-
|
|
37
|
+
- If launch_dims is not provided, kernel launch dimensions are inferred from the shape of the first argument.
|
|
32
38
|
- Input arguments are followed by output arguments in the Warp kernel definition.
|
|
33
39
|
- There must be at least one input argument and at least one output argument.
|
|
34
|
-
- Output shapes must match the launch dimensions (i.e., output shapes must match the shape of the first argument).
|
|
35
40
|
- All arrays must be contiguous.
|
|
36
41
|
- Only the CUDA backend is supported.
|
|
37
42
|
"""
|
|
@@ -47,7 +52,7 @@ def jax_kernel(wp_kernel):
|
|
|
47
52
|
id = _registered_kernel_to_id[wp_kernel]
|
|
48
53
|
|
|
49
54
|
def bind(*args):
|
|
50
|
-
return _jax_warp_p.bind(*args, kernel=id)
|
|
55
|
+
return _jax_warp_p.bind(*args, kernel=id, launch_dims=launch_dims)
|
|
51
56
|
|
|
52
57
|
return bind
|
|
53
58
|
|
|
@@ -106,7 +111,7 @@ def _get_jax_device():
|
|
|
106
111
|
device = jax.config.jax_default_device
|
|
107
112
|
# if default device is not set, use first device
|
|
108
113
|
if device is None:
|
|
109
|
-
device = jax.
|
|
114
|
+
device = jax.local_devices()[0]
|
|
110
115
|
return device
|
|
111
116
|
|
|
112
117
|
|
|
@@ -223,12 +228,17 @@ def _create_jax_warp_primitive():
|
|
|
223
228
|
raise TypeError(f"Invalid or unsupported data type: {jax_ir_type}")
|
|
224
229
|
|
|
225
230
|
# Abstract evaluation.
|
|
226
|
-
def jax_warp_abstract(*args, kernel=None):
|
|
231
|
+
def jax_warp_abstract(*args, kernel=None, launch_dims=None):
|
|
227
232
|
wp_kernel = _registered_kernels[kernel]
|
|
228
233
|
# All the extra arguments to the warp kernel are outputs.
|
|
229
234
|
warp_outputs = [o.type for o in wp_kernel.adj.args[len(args) :]]
|
|
230
|
-
|
|
231
|
-
|
|
235
|
+
|
|
236
|
+
if launch_dims is None:
|
|
237
|
+
# Use the first input dimension to infer the output's dimensions if launch_dims is not provided
|
|
238
|
+
dims = strip_vecmat_dimensions(wp_kernel.adj.args[0], list(args[0].shape))
|
|
239
|
+
else:
|
|
240
|
+
dims = launch_dims
|
|
241
|
+
|
|
232
242
|
jax_outputs = []
|
|
233
243
|
for o in warp_outputs:
|
|
234
244
|
shape = list(dims) + list(get_vecmat_shape(o))
|
|
@@ -260,7 +270,7 @@ def _create_jax_warp_primitive():
|
|
|
260
270
|
def default_layout(shape):
|
|
261
271
|
return range(len(shape) - 1, -1, -1)
|
|
262
272
|
|
|
263
|
-
def warp_call_lowering(ctx, *args, kernel=None):
|
|
273
|
+
def warp_call_lowering(ctx, *args, kernel=None, launch_dims=None):
|
|
264
274
|
if not kernel:
|
|
265
275
|
raise Exception("Unknown kernel id " + str(kernel))
|
|
266
276
|
wp_kernel = _registered_kernels[kernel]
|
|
@@ -272,12 +282,15 @@ def _create_jax_warp_primitive():
|
|
|
272
282
|
if not module.load(device):
|
|
273
283
|
raise Exception("Could not load kernel on device")
|
|
274
284
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
285
|
+
if launch_dims is None:
|
|
286
|
+
# Infer dimensions from the first input.
|
|
287
|
+
warp_arg0 = wp_kernel.adj.args[0]
|
|
288
|
+
actual_shape0 = ir.RankedTensorType(args[0].type).shape
|
|
289
|
+
dims = strip_vecmat_dimensions(warp_arg0, actual_shape0)
|
|
290
|
+
warp_dims = collapse_into_leading_dimension(warp_arg0, dims)
|
|
291
|
+
else:
|
|
292
|
+
dims = launch_dims
|
|
293
|
+
warp_dims = launch_dims
|
|
281
294
|
# Figure out the types and shapes of the input arrays.
|
|
282
295
|
arg_strings = []
|
|
283
296
|
operand_layouts = []
|
warp/native/array.h
CHANGED
|
@@ -938,7 +938,6 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
|
|
|
938
938
|
FP_VERIFY_ADJ_4(value, adj_value)
|
|
939
939
|
}
|
|
940
940
|
|
|
941
|
-
|
|
942
941
|
template<typename T>
|
|
943
942
|
inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret)
|
|
944
943
|
{
|
warp/native/builtin.h
CHANGED
|
@@ -1240,7 +1240,7 @@ inline CUDA_CALLABLE float16 atomic_add(float16* buf, float16 value)
|
|
|
1240
1240
|
|
|
1241
1241
|
}
|
|
1242
1242
|
|
|
1243
|
-
// emulate atomic float max
|
|
1243
|
+
// emulate atomic float max with atomicCAS()
|
|
1244
1244
|
inline CUDA_CALLABLE float atomic_max(float* address, float val)
|
|
1245
1245
|
{
|
|
1246
1246
|
#if defined(__CUDA_ARCH__)
|
|
@@ -1263,7 +1263,7 @@ inline CUDA_CALLABLE float atomic_max(float* address, float val)
|
|
|
1263
1263
|
#endif
|
|
1264
1264
|
}
|
|
1265
1265
|
|
|
1266
|
-
// emulate atomic float min
|
|
1266
|
+
// emulate atomic float min with atomicCAS()
|
|
1267
1267
|
inline CUDA_CALLABLE float atomic_min(float* address, float val)
|
|
1268
1268
|
{
|
|
1269
1269
|
#if defined(__CUDA_ARCH__)
|
|
@@ -1286,6 +1286,88 @@ inline CUDA_CALLABLE float atomic_min(float* address, float val)
|
|
|
1286
1286
|
#endif
|
|
1287
1287
|
}
|
|
1288
1288
|
|
|
1289
|
+
template<>
|
|
1290
|
+
inline CUDA_CALLABLE float64 atomic_add(float64* buf, float64 value)
|
|
1291
|
+
{
|
|
1292
|
+
#if !defined(__CUDA_ARCH__)
|
|
1293
|
+
float64 old = buf[0];
|
|
1294
|
+
buf[0] += value;
|
|
1295
|
+
return old;
|
|
1296
|
+
#elif defined(__clang__) // CUDA compiled by Clang
|
|
1297
|
+
return atomicAdd(buf, value);
|
|
1298
|
+
#else // CUDA compiled by NVRTC
|
|
1299
|
+
|
|
1300
|
+
/* Define __PTR for atomicAdd prototypes below, undef after done */
|
|
1301
|
+
#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
|
|
1302
|
+
#define __PTR "l"
|
|
1303
|
+
#else
|
|
1304
|
+
#define __PTR "r"
|
|
1305
|
+
#endif /*(defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)*/
|
|
1306
|
+
|
|
1307
|
+
double r = 0.0;
|
|
1308
|
+
|
|
1309
|
+
#if __CUDA_ARCH__ >= 600
|
|
1310
|
+
|
|
1311
|
+
asm volatile ("{ atom.add.f64 %0,[%1],%2; }\n"
|
|
1312
|
+
: "=d"(r)
|
|
1313
|
+
: __PTR(buf), "d"(value)
|
|
1314
|
+
: "memory");
|
|
1315
|
+
#endif
|
|
1316
|
+
|
|
1317
|
+
return r;
|
|
1318
|
+
|
|
1319
|
+
#undef __PTR
|
|
1320
|
+
|
|
1321
|
+
#endif // CUDA compiled by NVRTC
|
|
1322
|
+
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
// emulate atomic double max with atomicCAS()
|
|
1326
|
+
inline CUDA_CALLABLE double atomic_max(double* address, double val)
|
|
1327
|
+
{
|
|
1328
|
+
#if defined(__CUDA_ARCH__)
|
|
1329
|
+
unsigned long long int *address_as_ull = (unsigned long long int*)address;
|
|
1330
|
+
unsigned long long int old = *address_as_ull, assumed;
|
|
1331
|
+
|
|
1332
|
+
while (val > __longlong_as_double(old))
|
|
1333
|
+
{
|
|
1334
|
+
assumed = old;
|
|
1335
|
+
old = atomicCAS(address_as_ull, assumed,
|
|
1336
|
+
__double_as_longlong(val));
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
return __longlong_as_double(old);
|
|
1340
|
+
|
|
1341
|
+
#else
|
|
1342
|
+
double old = *address;
|
|
1343
|
+
*address = max(old, val);
|
|
1344
|
+
return old;
|
|
1345
|
+
#endif
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
// emulate atomic double min with atomicCAS()
|
|
1349
|
+
inline CUDA_CALLABLE double atomic_min(double* address, double val)
|
|
1350
|
+
{
|
|
1351
|
+
#if defined(__CUDA_ARCH__)
|
|
1352
|
+
unsigned long long int *address_as_ull = (unsigned long long int*)address;
|
|
1353
|
+
unsigned long long int old = *address_as_ull, assumed;
|
|
1354
|
+
|
|
1355
|
+
while (val < __longlong_as_double(old))
|
|
1356
|
+
{
|
|
1357
|
+
assumed = old;
|
|
1358
|
+
old = atomicCAS(address_as_ull, assumed,
|
|
1359
|
+
__double_as_longlong(val));
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
return __longlong_as_double(old);
|
|
1363
|
+
|
|
1364
|
+
#else
|
|
1365
|
+
double old = *address;
|
|
1366
|
+
*address = min(old, val);
|
|
1367
|
+
return old;
|
|
1368
|
+
#endif
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1289
1371
|
inline CUDA_CALLABLE int atomic_max(int* address, int val)
|
|
1290
1372
|
{
|
|
1291
1373
|
#if defined(__CUDA_ARCH__)
|
|
@@ -1403,14 +1485,19 @@ inline CUDA_CALLABLE void print(const str s)
|
|
|
1403
1485
|
printf("%s\n", s);
|
|
1404
1486
|
}
|
|
1405
1487
|
|
|
1406
|
-
inline CUDA_CALLABLE void print(
|
|
1488
|
+
inline CUDA_CALLABLE void print(signed char i)
|
|
1407
1489
|
{
|
|
1408
1490
|
printf("%d\n", i);
|
|
1409
1491
|
}
|
|
1410
1492
|
|
|
1411
1493
|
inline CUDA_CALLABLE void print(short i)
|
|
1412
1494
|
{
|
|
1413
|
-
printf("%
|
|
1495
|
+
printf("%d\n", i);
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
inline CUDA_CALLABLE void print(int i)
|
|
1499
|
+
{
|
|
1500
|
+
printf("%d\n", i);
|
|
1414
1501
|
}
|
|
1415
1502
|
|
|
1416
1503
|
inline CUDA_CALLABLE void print(long i)
|
|
@@ -1423,14 +1510,19 @@ inline CUDA_CALLABLE void print(long long i)
|
|
|
1423
1510
|
printf("%lld\n", i);
|
|
1424
1511
|
}
|
|
1425
1512
|
|
|
1426
|
-
inline CUDA_CALLABLE void print(unsigned i)
|
|
1513
|
+
inline CUDA_CALLABLE void print(unsigned char i)
|
|
1427
1514
|
{
|
|
1428
1515
|
printf("%u\n", i);
|
|
1429
1516
|
}
|
|
1430
1517
|
|
|
1431
1518
|
inline CUDA_CALLABLE void print(unsigned short i)
|
|
1432
1519
|
{
|
|
1433
|
-
printf("%
|
|
1520
|
+
printf("%u\n", i);
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
inline CUDA_CALLABLE void print(unsigned int i)
|
|
1524
|
+
{
|
|
1525
|
+
printf("%u\n", i);
|
|
1434
1526
|
}
|
|
1435
1527
|
|
|
1436
1528
|
inline CUDA_CALLABLE void print(unsigned long i)
|
|
@@ -1443,6 +1535,11 @@ inline CUDA_CALLABLE void print(unsigned long long i)
|
|
|
1443
1535
|
printf("%llu\n", i);
|
|
1444
1536
|
}
|
|
1445
1537
|
|
|
1538
|
+
inline CUDA_CALLABLE void print(bool b)
|
|
1539
|
+
{
|
|
1540
|
+
printf(b ? "True\n" : "False\n");
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1446
1543
|
template<unsigned Length, typename Type>
|
|
1447
1544
|
inline CUDA_CALLABLE void print(vec_t<Length, Type> v)
|
|
1448
1545
|
{
|
warp/native/bvh.cu
CHANGED
|
@@ -65,7 +65,7 @@ __global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __
|
|
|
65
65
|
int finished = atomicAdd(&child_count[parent], 1);
|
|
66
66
|
|
|
67
67
|
// if we have are the last thread (such that the parent node is now complete)
|
|
68
|
-
// then update its bounds and move onto the
|
|
68
|
+
// then update its bounds and move onto the next parent in the hierarchy
|
|
69
69
|
if (finished == 1)
|
|
70
70
|
{
|
|
71
71
|
const int left_child = node_lowers[parent].i;
|
|
@@ -273,7 +273,7 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
|
|
|
273
273
|
}
|
|
274
274
|
|
|
275
275
|
// if we have are the last thread (such that the parent node is now complete)
|
|
276
|
-
// then update its bounds and move onto the
|
|
276
|
+
// then update its bounds and move onto the next parent in the hierarchy
|
|
277
277
|
if (childCount == 1)
|
|
278
278
|
{
|
|
279
279
|
const int left_child = lowers[parent].i;
|
|
@@ -463,7 +463,9 @@ void bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_items,
|
|
|
463
463
|
bvh_host.num_items = num_items;
|
|
464
464
|
bvh_host.max_nodes = 2*num_items;
|
|
465
465
|
bvh_host.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
|
|
466
|
+
memset_device(WP_CURRENT_CONTEXT, bvh_host.node_lowers, 0, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
|
|
466
467
|
bvh_host.node_uppers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
|
|
468
|
+
memset_device(WP_CURRENT_CONTEXT, bvh_host.node_uppers, 0, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
|
|
467
469
|
bvh_host.node_parents = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
|
|
468
470
|
bvh_host.node_counts = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
|
|
469
471
|
bvh_host.root = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int));
|
warp/native/cuda_util.cpp
CHANGED
|
@@ -82,6 +82,8 @@ static PFN_cuStreamWaitEvent_v3020 pfn_cuStreamWaitEvent;
|
|
|
82
82
|
static PFN_cuStreamGetCtx_v9020 pfn_cuStreamGetCtx;
|
|
83
83
|
static PFN_cuStreamGetCaptureInfo_v11030 pfn_cuStreamGetCaptureInfo;
|
|
84
84
|
static PFN_cuStreamUpdateCaptureDependencies_v11030 pfn_cuStreamUpdateCaptureDependencies;
|
|
85
|
+
static PFN_cuStreamCreateWithPriority_v5050 pfn_cuStreamCreateWithPriority;
|
|
86
|
+
static PFN_cuStreamGetPriority_v5050 pfn_cuStreamGetPriority;
|
|
85
87
|
static PFN_cuEventCreate_v2000 pfn_cuEventCreate;
|
|
86
88
|
static PFN_cuEventDestroy_v4000 pfn_cuEventDestroy;
|
|
87
89
|
static PFN_cuEventRecord_v2000 pfn_cuEventRecord;
|
|
@@ -211,6 +213,8 @@ bool init_cuda_driver()
|
|
|
211
213
|
get_driver_entry_point("cuStreamGetCtx", &(void*&)pfn_cuStreamGetCtx);
|
|
212
214
|
get_driver_entry_point("cuStreamGetCaptureInfo", &(void*&)pfn_cuStreamGetCaptureInfo);
|
|
213
215
|
get_driver_entry_point("cuStreamUpdateCaptureDependencies", &(void*&)pfn_cuStreamUpdateCaptureDependencies);
|
|
216
|
+
get_driver_entry_point("cuStreamCreateWithPriority", &(void*&)pfn_cuStreamCreateWithPriority);
|
|
217
|
+
get_driver_entry_point("cuStreamGetPriority", &(void*&)pfn_cuStreamGetPriority);
|
|
214
218
|
get_driver_entry_point("cuEventCreate", &(void*&)pfn_cuEventCreate);
|
|
215
219
|
get_driver_entry_point("cuEventDestroy", &(void*&)pfn_cuEventDestroy);
|
|
216
220
|
get_driver_entry_point("cuEventRecord", &(void*&)pfn_cuEventRecord);
|
|
@@ -474,6 +478,16 @@ CUresult cuStreamUpdateCaptureDependencies_f(CUstream stream, CUgraphNode *depen
|
|
|
474
478
|
return pfn_cuStreamUpdateCaptureDependencies ? pfn_cuStreamUpdateCaptureDependencies(stream, dependencies, numDependencies, flags) : DRIVER_ENTRY_POINT_ERROR;
|
|
475
479
|
}
|
|
476
480
|
|
|
481
|
+
CUresult cuStreamCreateWithPriority_f(CUstream* phStream, unsigned int flags, int priority)
|
|
482
|
+
{
|
|
483
|
+
return pfn_cuStreamCreateWithPriority ? pfn_cuStreamCreateWithPriority(phStream, flags, priority) : DRIVER_ENTRY_POINT_ERROR;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
CUresult cuStreamGetPriority_f(CUstream hStream, int* priority)
|
|
487
|
+
{
|
|
488
|
+
return pfn_cuStreamGetPriority ? pfn_cuStreamGetPriority(hStream, priority) : DRIVER_ENTRY_POINT_ERROR;
|
|
489
|
+
}
|
|
490
|
+
|
|
477
491
|
CUresult cuEventCreate_f(CUevent* event, unsigned int flags)
|
|
478
492
|
{
|
|
479
493
|
return pfn_cuEventCreate ? pfn_cuEventCreate(event, flags) : DRIVER_ENTRY_POINT_ERROR;
|
warp/native/cuda_util.h
CHANGED
|
@@ -81,6 +81,8 @@ CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags)
|
|
|
81
81
|
CUresult cuStreamGetCtx_f(CUstream stream, CUcontext* pctx);
|
|
82
82
|
CUresult cuStreamGetCaptureInfo_f(CUstream stream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
|
|
83
83
|
CUresult cuStreamUpdateCaptureDependencies_f(CUstream stream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
|
|
84
|
+
CUresult cuStreamCreateWithPriority_f(CUstream* phStream, unsigned int flags, int priority);
|
|
85
|
+
CUresult cuStreamGetPriority_f(CUstream hStream, int* priority);
|
|
84
86
|
CUresult cuEventCreate_f(CUevent* event, unsigned int flags);
|
|
85
87
|
CUresult cuEventDestroy_f(CUevent event);
|
|
86
88
|
CUresult cuEventRecord_f(CUevent event, CUstream stream);
|
warp/native/error.cpp
CHANGED
|
@@ -28,7 +28,8 @@ void set_error_string(const char* fmt, ...)
|
|
|
28
28
|
vsnprintf(g_error_buffer, sizeof(g_error_buffer), fmt, args);
|
|
29
29
|
if (g_error_output_enabled)
|
|
30
30
|
{
|
|
31
|
-
vfprintf(
|
|
31
|
+
// note: we deliberately avoid vfprintf() due to problems with runtime glibc mismatch
|
|
32
|
+
fputs(g_error_buffer, g_error_stream);
|
|
32
33
|
fputc('\n', g_error_stream);
|
|
33
34
|
fflush(g_error_stream);
|
|
34
35
|
}
|
|
@@ -46,7 +47,8 @@ void append_error_string(const char* fmt, ...)
|
|
|
46
47
|
vsnprintf(g_error_buffer + offset, sizeof(g_error_buffer) - offset, fmt, args);
|
|
47
48
|
if (g_error_output_enabled)
|
|
48
49
|
{
|
|
49
|
-
vfprintf(
|
|
50
|
+
// note: we deliberately avoid vfprintf() due to problems with runtime glibc mismatch
|
|
51
|
+
fputs(g_error_buffer + offset, g_error_stream);
|
|
50
52
|
fputc('\n', g_error_stream);
|
|
51
53
|
fflush(g_error_stream);
|
|
52
54
|
}
|
warp/native/exports.h
CHANGED
|
@@ -1041,6 +1041,69 @@ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, ve
|
|
|
1041
1041
|
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1042
1042
|
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1043
1043
|
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1044
|
+
WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
|
|
1045
|
+
WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
|
|
1046
|
+
WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
|
|
1047
|
+
WP_API void builtin_assign_spatial_vectorh_int32_float16(spatial_vectorh& a, int32 i, float16 value, spatial_vectorh* ret) { *ret = wp::assign(a, i, value); }
|
|
1048
|
+
WP_API void builtin_assign_vec2f_int32_float32(vec2f& a, int32 i, float32 value, vec2f* ret) { *ret = wp::assign(a, i, value); }
|
|
1049
|
+
WP_API void builtin_assign_vec3f_int32_float32(vec3f& a, int32 i, float32 value, vec3f* ret) { *ret = wp::assign(a, i, value); }
|
|
1050
|
+
WP_API void builtin_assign_vec4f_int32_float32(vec4f& a, int32 i, float32 value, vec4f* ret) { *ret = wp::assign(a, i, value); }
|
|
1051
|
+
WP_API void builtin_assign_spatial_vectorf_int32_float32(spatial_vectorf& a, int32 i, float32 value, spatial_vectorf* ret) { *ret = wp::assign(a, i, value); }
|
|
1052
|
+
WP_API void builtin_assign_vec2d_int32_float64(vec2d& a, int32 i, float64 value, vec2d* ret) { *ret = wp::assign(a, i, value); }
|
|
1053
|
+
WP_API void builtin_assign_vec3d_int32_float64(vec3d& a, int32 i, float64 value, vec3d* ret) { *ret = wp::assign(a, i, value); }
|
|
1054
|
+
WP_API void builtin_assign_vec4d_int32_float64(vec4d& a, int32 i, float64 value, vec4d* ret) { *ret = wp::assign(a, i, value); }
|
|
1055
|
+
WP_API void builtin_assign_spatial_vectord_int32_float64(spatial_vectord& a, int32 i, float64 value, spatial_vectord* ret) { *ret = wp::assign(a, i, value); }
|
|
1056
|
+
WP_API void builtin_assign_vec2s_int32_int16(vec2s& a, int32 i, int16 value, vec2s* ret) { *ret = wp::assign(a, i, value); }
|
|
1057
|
+
WP_API void builtin_assign_vec3s_int32_int16(vec3s& a, int32 i, int16 value, vec3s* ret) { *ret = wp::assign(a, i, value); }
|
|
1058
|
+
WP_API void builtin_assign_vec4s_int32_int16(vec4s& a, int32 i, int16 value, vec4s* ret) { *ret = wp::assign(a, i, value); }
|
|
1059
|
+
WP_API void builtin_assign_vec2i_int32_int32(vec2i& a, int32 i, int32 value, vec2i* ret) { *ret = wp::assign(a, i, value); }
|
|
1060
|
+
WP_API void builtin_assign_vec3i_int32_int32(vec3i& a, int32 i, int32 value, vec3i* ret) { *ret = wp::assign(a, i, value); }
|
|
1061
|
+
WP_API void builtin_assign_vec4i_int32_int32(vec4i& a, int32 i, int32 value, vec4i* ret) { *ret = wp::assign(a, i, value); }
|
|
1062
|
+
WP_API void builtin_assign_vec2l_int32_int64(vec2l& a, int32 i, int64 value, vec2l* ret) { *ret = wp::assign(a, i, value); }
|
|
1063
|
+
WP_API void builtin_assign_vec3l_int32_int64(vec3l& a, int32 i, int64 value, vec3l* ret) { *ret = wp::assign(a, i, value); }
|
|
1064
|
+
WP_API void builtin_assign_vec4l_int32_int64(vec4l& a, int32 i, int64 value, vec4l* ret) { *ret = wp::assign(a, i, value); }
|
|
1065
|
+
WP_API void builtin_assign_vec2b_int32_int8(vec2b& a, int32 i, int8 value, vec2b* ret) { *ret = wp::assign(a, i, value); }
|
|
1066
|
+
WP_API void builtin_assign_vec3b_int32_int8(vec3b& a, int32 i, int8 value, vec3b* ret) { *ret = wp::assign(a, i, value); }
|
|
1067
|
+
WP_API void builtin_assign_vec4b_int32_int8(vec4b& a, int32 i, int8 value, vec4b* ret) { *ret = wp::assign(a, i, value); }
|
|
1068
|
+
WP_API void builtin_assign_vec2us_int32_uint16(vec2us& a, int32 i, uint16 value, vec2us* ret) { *ret = wp::assign(a, i, value); }
|
|
1069
|
+
WP_API void builtin_assign_vec3us_int32_uint16(vec3us& a, int32 i, uint16 value, vec3us* ret) { *ret = wp::assign(a, i, value); }
|
|
1070
|
+
WP_API void builtin_assign_vec4us_int32_uint16(vec4us& a, int32 i, uint16 value, vec4us* ret) { *ret = wp::assign(a, i, value); }
|
|
1071
|
+
WP_API void builtin_assign_vec2ui_int32_uint32(vec2ui& a, int32 i, uint32 value, vec2ui* ret) { *ret = wp::assign(a, i, value); }
|
|
1072
|
+
WP_API void builtin_assign_vec3ui_int32_uint32(vec3ui& a, int32 i, uint32 value, vec3ui* ret) { *ret = wp::assign(a, i, value); }
|
|
1073
|
+
WP_API void builtin_assign_vec4ui_int32_uint32(vec4ui& a, int32 i, uint32 value, vec4ui* ret) { *ret = wp::assign(a, i, value); }
|
|
1074
|
+
WP_API void builtin_assign_vec2ul_int32_uint64(vec2ul& a, int32 i, uint64 value, vec2ul* ret) { *ret = wp::assign(a, i, value); }
|
|
1075
|
+
WP_API void builtin_assign_vec3ul_int32_uint64(vec3ul& a, int32 i, uint64 value, vec3ul* ret) { *ret = wp::assign(a, i, value); }
|
|
1076
|
+
WP_API void builtin_assign_vec4ul_int32_uint64(vec4ul& a, int32 i, uint64 value, vec4ul* ret) { *ret = wp::assign(a, i, value); }
|
|
1077
|
+
WP_API void builtin_assign_vec2ub_int32_uint8(vec2ub& a, int32 i, uint8 value, vec2ub* ret) { *ret = wp::assign(a, i, value); }
|
|
1078
|
+
WP_API void builtin_assign_vec3ub_int32_uint8(vec3ub& a, int32 i, uint8 value, vec3ub* ret) { *ret = wp::assign(a, i, value); }
|
|
1079
|
+
WP_API void builtin_assign_vec4ub_int32_uint8(vec4ub& a, int32 i, uint8 value, vec4ub* ret) { *ret = wp::assign(a, i, value); }
|
|
1080
|
+
WP_API void builtin_assign_quath_int32_float16(quath& a, int32 i, float16 value, quath* ret) { *ret = wp::assign(a, i, value); }
|
|
1081
|
+
WP_API void builtin_assign_quatf_int32_float32(quatf& a, int32 i, float32 value, quatf* ret) { *ret = wp::assign(a, i, value); }
|
|
1082
|
+
WP_API void builtin_assign_quatd_int32_float64(quatd& a, int32 i, float64 value, quatd* ret) { *ret = wp::assign(a, i, value); }
|
|
1083
|
+
WP_API void builtin_assign_mat22h_int32_int32_float16(mat22h& a, int32 i, int32 j, float16 value, mat22h* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1084
|
+
WP_API void builtin_assign_mat33h_int32_int32_float16(mat33h& a, int32 i, int32 j, float16 value, mat33h* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1085
|
+
WP_API void builtin_assign_mat44h_int32_int32_float16(mat44h& a, int32 i, int32 j, float16 value, mat44h* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1086
|
+
WP_API void builtin_assign_spatial_matrixh_int32_int32_float16(spatial_matrixh& a, int32 i, int32 j, float16 value, spatial_matrixh* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1087
|
+
WP_API void builtin_assign_mat22f_int32_int32_float32(mat22f& a, int32 i, int32 j, float32 value, mat22f* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1088
|
+
WP_API void builtin_assign_mat33f_int32_int32_float32(mat33f& a, int32 i, int32 j, float32 value, mat33f* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1089
|
+
WP_API void builtin_assign_mat44f_int32_int32_float32(mat44f& a, int32 i, int32 j, float32 value, mat44f* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1090
|
+
WP_API void builtin_assign_spatial_matrixf_int32_int32_float32(spatial_matrixf& a, int32 i, int32 j, float32 value, spatial_matrixf* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1091
|
+
WP_API void builtin_assign_mat22d_int32_int32_float64(mat22d& a, int32 i, int32 j, float64 value, mat22d* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1092
|
+
WP_API void builtin_assign_mat33d_int32_int32_float64(mat33d& a, int32 i, int32 j, float64 value, mat33d* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1093
|
+
WP_API void builtin_assign_mat44d_int32_int32_float64(mat44d& a, int32 i, int32 j, float64 value, mat44d* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1094
|
+
WP_API void builtin_assign_spatial_matrixd_int32_int32_float64(spatial_matrixd& a, int32 i, int32 j, float64 value, spatial_matrixd* ret) { *ret = wp::assign(a, i, j, value); }
|
|
1095
|
+
WP_API void builtin_assign_mat22h_int32_vec2h(mat22h& a, int32 i, vec2h& value, mat22h* ret) { *ret = wp::assign(a, i, value); }
|
|
1096
|
+
WP_API void builtin_assign_mat33h_int32_vec3h(mat33h& a, int32 i, vec3h& value, mat33h* ret) { *ret = wp::assign(a, i, value); }
|
|
1097
|
+
WP_API void builtin_assign_mat44h_int32_vec4h(mat44h& a, int32 i, vec4h& value, mat44h* ret) { *ret = wp::assign(a, i, value); }
|
|
1098
|
+
WP_API void builtin_assign_spatial_matrixh_int32_spatial_vectorh(spatial_matrixh& a, int32 i, spatial_vectorh& value, spatial_matrixh* ret) { *ret = wp::assign(a, i, value); }
|
|
1099
|
+
WP_API void builtin_assign_mat22f_int32_vec2f(mat22f& a, int32 i, vec2f& value, mat22f* ret) { *ret = wp::assign(a, i, value); }
|
|
1100
|
+
WP_API void builtin_assign_mat33f_int32_vec3f(mat33f& a, int32 i, vec3f& value, mat33f* ret) { *ret = wp::assign(a, i, value); }
|
|
1101
|
+
WP_API void builtin_assign_mat44f_int32_vec4f(mat44f& a, int32 i, vec4f& value, mat44f* ret) { *ret = wp::assign(a, i, value); }
|
|
1102
|
+
WP_API void builtin_assign_spatial_matrixf_int32_spatial_vectorf(spatial_matrixf& a, int32 i, spatial_vectorf& value, spatial_matrixf* ret) { *ret = wp::assign(a, i, value); }
|
|
1103
|
+
WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value, mat22d* ret) { *ret = wp::assign(a, i, value); }
|
|
1104
|
+
WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
|
|
1105
|
+
WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
|
|
1106
|
+
WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
|
|
1044
1107
|
WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1045
1108
|
WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1046
1109
|
WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
@@ -1494,6 +1557,42 @@ WP_API void builtin_mod_uint16_uint16(uint16 a, uint16 b, uint16* ret) { *ret =
|
|
|
1494
1557
|
WP_API void builtin_mod_uint32_uint32(uint32 a, uint32 b, uint32* ret) { *ret = wp::mod(a, b); }
|
|
1495
1558
|
WP_API void builtin_mod_uint64_uint64(uint64 a, uint64 b, uint64* ret) { *ret = wp::mod(a, b); }
|
|
1496
1559
|
WP_API void builtin_mod_uint8_uint8(uint8 a, uint8 b, uint8* ret) { *ret = wp::mod(a, b); }
|
|
1560
|
+
WP_API void builtin_mod_vec2h_vec2h(vec2h& a, vec2h& b, vec2h* ret) { *ret = wp::mod(a, b); }
|
|
1561
|
+
WP_API void builtin_mod_vec3h_vec3h(vec3h& a, vec3h& b, vec3h* ret) { *ret = wp::mod(a, b); }
|
|
1562
|
+
WP_API void builtin_mod_vec4h_vec4h(vec4h& a, vec4h& b, vec4h* ret) { *ret = wp::mod(a, b); }
|
|
1563
|
+
WP_API void builtin_mod_spatial_vectorh_spatial_vectorh(spatial_vectorh& a, spatial_vectorh& b, spatial_vectorh* ret) { *ret = wp::mod(a, b); }
|
|
1564
|
+
WP_API void builtin_mod_vec2f_vec2f(vec2f& a, vec2f& b, vec2f* ret) { *ret = wp::mod(a, b); }
|
|
1565
|
+
WP_API void builtin_mod_vec3f_vec3f(vec3f& a, vec3f& b, vec3f* ret) { *ret = wp::mod(a, b); }
|
|
1566
|
+
WP_API void builtin_mod_vec4f_vec4f(vec4f& a, vec4f& b, vec4f* ret) { *ret = wp::mod(a, b); }
|
|
1567
|
+
WP_API void builtin_mod_spatial_vectorf_spatial_vectorf(spatial_vectorf& a, spatial_vectorf& b, spatial_vectorf* ret) { *ret = wp::mod(a, b); }
|
|
1568
|
+
WP_API void builtin_mod_vec2d_vec2d(vec2d& a, vec2d& b, vec2d* ret) { *ret = wp::mod(a, b); }
|
|
1569
|
+
WP_API void builtin_mod_vec3d_vec3d(vec3d& a, vec3d& b, vec3d* ret) { *ret = wp::mod(a, b); }
|
|
1570
|
+
WP_API void builtin_mod_vec4d_vec4d(vec4d& a, vec4d& b, vec4d* ret) { *ret = wp::mod(a, b); }
|
|
1571
|
+
WP_API void builtin_mod_spatial_vectord_spatial_vectord(spatial_vectord& a, spatial_vectord& b, spatial_vectord* ret) { *ret = wp::mod(a, b); }
|
|
1572
|
+
WP_API void builtin_mod_vec2s_vec2s(vec2s& a, vec2s& b, vec2s* ret) { *ret = wp::mod(a, b); }
|
|
1573
|
+
WP_API void builtin_mod_vec3s_vec3s(vec3s& a, vec3s& b, vec3s* ret) { *ret = wp::mod(a, b); }
|
|
1574
|
+
WP_API void builtin_mod_vec4s_vec4s(vec4s& a, vec4s& b, vec4s* ret) { *ret = wp::mod(a, b); }
|
|
1575
|
+
WP_API void builtin_mod_vec2i_vec2i(vec2i& a, vec2i& b, vec2i* ret) { *ret = wp::mod(a, b); }
|
|
1576
|
+
WP_API void builtin_mod_vec3i_vec3i(vec3i& a, vec3i& b, vec3i* ret) { *ret = wp::mod(a, b); }
|
|
1577
|
+
WP_API void builtin_mod_vec4i_vec4i(vec4i& a, vec4i& b, vec4i* ret) { *ret = wp::mod(a, b); }
|
|
1578
|
+
WP_API void builtin_mod_vec2l_vec2l(vec2l& a, vec2l& b, vec2l* ret) { *ret = wp::mod(a, b); }
|
|
1579
|
+
WP_API void builtin_mod_vec3l_vec3l(vec3l& a, vec3l& b, vec3l* ret) { *ret = wp::mod(a, b); }
|
|
1580
|
+
WP_API void builtin_mod_vec4l_vec4l(vec4l& a, vec4l& b, vec4l* ret) { *ret = wp::mod(a, b); }
|
|
1581
|
+
WP_API void builtin_mod_vec2b_vec2b(vec2b& a, vec2b& b, vec2b* ret) { *ret = wp::mod(a, b); }
|
|
1582
|
+
WP_API void builtin_mod_vec3b_vec3b(vec3b& a, vec3b& b, vec3b* ret) { *ret = wp::mod(a, b); }
|
|
1583
|
+
WP_API void builtin_mod_vec4b_vec4b(vec4b& a, vec4b& b, vec4b* ret) { *ret = wp::mod(a, b); }
|
|
1584
|
+
WP_API void builtin_mod_vec2us_vec2us(vec2us& a, vec2us& b, vec2us* ret) { *ret = wp::mod(a, b); }
|
|
1585
|
+
WP_API void builtin_mod_vec3us_vec3us(vec3us& a, vec3us& b, vec3us* ret) { *ret = wp::mod(a, b); }
|
|
1586
|
+
WP_API void builtin_mod_vec4us_vec4us(vec4us& a, vec4us& b, vec4us* ret) { *ret = wp::mod(a, b); }
|
|
1587
|
+
WP_API void builtin_mod_vec2ui_vec2ui(vec2ui& a, vec2ui& b, vec2ui* ret) { *ret = wp::mod(a, b); }
|
|
1588
|
+
WP_API void builtin_mod_vec3ui_vec3ui(vec3ui& a, vec3ui& b, vec3ui* ret) { *ret = wp::mod(a, b); }
|
|
1589
|
+
WP_API void builtin_mod_vec4ui_vec4ui(vec4ui& a, vec4ui& b, vec4ui* ret) { *ret = wp::mod(a, b); }
|
|
1590
|
+
WP_API void builtin_mod_vec2ul_vec2ul(vec2ul& a, vec2ul& b, vec2ul* ret) { *ret = wp::mod(a, b); }
|
|
1591
|
+
WP_API void builtin_mod_vec3ul_vec3ul(vec3ul& a, vec3ul& b, vec3ul* ret) { *ret = wp::mod(a, b); }
|
|
1592
|
+
WP_API void builtin_mod_vec4ul_vec4ul(vec4ul& a, vec4ul& b, vec4ul* ret) { *ret = wp::mod(a, b); }
|
|
1593
|
+
WP_API void builtin_mod_vec2ub_vec2ub(vec2ub& a, vec2ub& b, vec2ub* ret) { *ret = wp::mod(a, b); }
|
|
1594
|
+
WP_API void builtin_mod_vec3ub_vec3ub(vec3ub& a, vec3ub& b, vec3ub* ret) { *ret = wp::mod(a, b); }
|
|
1595
|
+
WP_API void builtin_mod_vec4ub_vec4ub(vec4ub& a, vec4ub& b, vec4ub* ret) { *ret = wp::mod(a, b); }
|
|
1497
1596
|
WP_API void builtin_div_float16_float16(float16 a, float16 b, float16* ret) { *ret = wp::div(a, b); }
|
|
1498
1597
|
WP_API void builtin_div_float32_float32(float32 a, float32 b, float32* ret) { *ret = wp::div(a, b); }
|
|
1499
1598
|
WP_API void builtin_div_float64_float64(float64 a, float64 b, float64* ret) { *ret = wp::div(a, b); }
|
warp/native/mat.h
CHANGED
|
@@ -387,6 +387,103 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
|
|
|
387
387
|
// nop
|
|
388
388
|
}
|
|
389
389
|
|
|
390
|
+
|
|
391
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
392
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
393
|
+
{
|
|
394
|
+
#ifndef NDEBUG
|
|
395
|
+
if (row < 0 || row >= Rows)
|
|
396
|
+
{
|
|
397
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
398
|
+
assert(0);
|
|
399
|
+
}
|
|
400
|
+
if (col < 0 || col >= Cols)
|
|
401
|
+
{
|
|
402
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
403
|
+
assert(0);
|
|
404
|
+
}
|
|
405
|
+
#endif
|
|
406
|
+
|
|
407
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
408
|
+
ret.data[row][col] = value;
|
|
409
|
+
return ret;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
414
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
415
|
+
{
|
|
416
|
+
#ifndef NDEBUG
|
|
417
|
+
if (row < 0 || row >= Rows)
|
|
418
|
+
{
|
|
419
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
420
|
+
assert(0);
|
|
421
|
+
}
|
|
422
|
+
#endif
|
|
423
|
+
|
|
424
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
425
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
426
|
+
{
|
|
427
|
+
ret.data[row][i] = value[i];
|
|
428
|
+
}
|
|
429
|
+
return ret;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
434
|
+
inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
435
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
436
|
+
{
|
|
437
|
+
#ifndef NDEBUG
|
|
438
|
+
if (row < 0 || row >= Rows)
|
|
439
|
+
{
|
|
440
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
441
|
+
assert(0);
|
|
442
|
+
}
|
|
443
|
+
if (col < 0 || col >= Cols)
|
|
444
|
+
{
|
|
445
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
446
|
+
assert(0);
|
|
447
|
+
}
|
|
448
|
+
#endif
|
|
449
|
+
|
|
450
|
+
adj_value += adj_ret.data[row][col];
|
|
451
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
452
|
+
{
|
|
453
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
454
|
+
{
|
|
455
|
+
if(i != row || j != col)
|
|
456
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
463
|
+
inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
464
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
465
|
+
{
|
|
466
|
+
#ifndef NDEBUG
|
|
467
|
+
if (row < 0 || row >= Rows)
|
|
468
|
+
{
|
|
469
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
470
|
+
assert(0);
|
|
471
|
+
}
|
|
472
|
+
#endif
|
|
473
|
+
|
|
474
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
475
|
+
{
|
|
476
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
477
|
+
{
|
|
478
|
+
if (i==row)
|
|
479
|
+
adj_value[j] += adj_ret.data[i][j];
|
|
480
|
+
else
|
|
481
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
|
|
390
487
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
391
488
|
inline bool CUDA_CALLABLE isfinite(const mat_t<Rows,Cols,Type>& m)
|
|
392
489
|
{
|
warp/native/mesh.cpp
CHANGED
|
@@ -36,6 +36,16 @@ bool mesh_get_descriptor(uint64_t id, Mesh& mesh)
|
|
|
36
36
|
return true;
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
+
bool mesh_set_descriptor(uint64_t id, const Mesh& mesh)
|
|
40
|
+
{
|
|
41
|
+
const auto& iter = g_mesh_descriptors.find(id);
|
|
42
|
+
if (iter == g_mesh_descriptors.end())
|
|
43
|
+
return false;
|
|
44
|
+
else
|
|
45
|
+
iter->second = mesh;
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
|
|
39
49
|
void mesh_add_descriptor(uint64_t id, const Mesh& mesh)
|
|
40
50
|
{
|
|
41
51
|
g_mesh_descriptors[id] = mesh;
|
|
@@ -191,6 +201,30 @@ void mesh_refit_host(uint64_t id)
|
|
|
191
201
|
}
|
|
192
202
|
}
|
|
193
203
|
|
|
204
|
+
void mesh_set_points_host(uint64_t id, wp::array_t<wp::vec3> points)
|
|
205
|
+
{
|
|
206
|
+
Mesh* m = (Mesh*)(id);
|
|
207
|
+
if (points.ndim != 1 || points.shape[0] != m->points.shape[0])
|
|
208
|
+
{
|
|
209
|
+
fprintf(stderr, "The new points input for mesh_set_points_host does not match the shape of the original points!\n");
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
m->points = points;
|
|
214
|
+
|
|
215
|
+
mesh_refit_host(id);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
|
|
219
|
+
{
|
|
220
|
+
Mesh* m = (Mesh*)(id);
|
|
221
|
+
if (velocities.ndim != 1 || velocities.shape[0] != m->velocities.shape[0])
|
|
222
|
+
{
|
|
223
|
+
fprintf(stderr, "The new velocities input for mesh_set_velocities_host does not match the shape of the original velocities!\n");
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
m->velocities = velocities;
|
|
227
|
+
}
|
|
194
228
|
|
|
195
229
|
// stubs for non-CUDA platforms
|
|
196
230
|
#if !WP_ENABLE_CUDA
|
|
@@ -199,6 +233,8 @@ void mesh_refit_host(uint64_t id)
|
|
|
199
233
|
WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
|
|
200
234
|
WP_API void mesh_destroy_device(uint64_t id) {}
|
|
201
235
|
WP_API void mesh_refit_device(uint64_t id) {}
|
|
236
|
+
WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
|
|
237
|
+
WP_API void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> points) {};
|
|
202
238
|
|
|
203
239
|
|
|
204
240
|
#endif // !WP_ENABLE_CUDA
|