PyPI - warp-lang - Versions diffs - 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (170) hide show

warp/__init__.py +8 -0
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +7 -6
warp/build_dll.py +70 -79
warp/builtins.py +10 -6
warp/codegen.py +51 -19
warp/config.py +7 -8
warp/constants.py +3 -0
warp/context.py +948 -245
warp/dlpack.py +198 -113
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cartpole.urdf +110 -0
warp/examples/assets/crazyflie.usd +0 -0
warp/examples/assets/cube.usda +42 -0
warp/examples/assets/nv_ant.xml +92 -0
warp/examples/assets/nv_humanoid.xml +183 -0
warp/examples/assets/quadruped.urdf +268 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usda +56 -0
warp/examples/assets/torus.usda +105 -0
warp/examples/benchmarks/benchmark_api.py +383 -0
warp/examples/benchmarks/benchmark_cloth.py +279 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +100 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +142 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +146 -0
warp/examples/benchmarks/benchmark_launches.py +295 -0
warp/examples/core/example_dem.py +221 -0
warp/examples/core/example_fluid.py +267 -0
warp/examples/core/example_graph_capture.py +129 -0
warp/examples/core/example_marching_cubes.py +177 -0
warp/examples/core/example_mesh.py +154 -0
warp/examples/core/example_mesh_intersect.py +193 -0
warp/examples/core/example_nvdb.py +169 -0
warp/examples/core/example_raycast.py +89 -0
warp/examples/core/example_raymarch.py +178 -0
warp/examples/core/example_render_opengl.py +141 -0
warp/examples/core/example_sph.py +389 -0
warp/examples/core/example_torch.py +181 -0
warp/examples/core/example_wave.py +249 -0
warp/examples/fem/bsr_utils.py +380 -0
warp/examples/fem/example_apic_fluid.py +391 -0
warp/examples/fem/example_convection_diffusion.py +168 -0
warp/examples/fem/example_convection_diffusion_dg.py +209 -0
warp/examples/fem/example_convection_diffusion_dg0.py +194 -0
warp/examples/fem/example_deformed_geometry.py +159 -0
warp/examples/fem/example_diffusion.py +173 -0
warp/examples/fem/example_diffusion_3d.py +152 -0
warp/examples/fem/example_diffusion_mgpu.py +214 -0
warp/examples/fem/example_mixed_elasticity.py +222 -0
warp/examples/fem/example_navier_stokes.py +243 -0
warp/examples/fem/example_stokes.py +192 -0
warp/examples/fem/example_stokes_transfer.py +249 -0
warp/examples/fem/mesh_utils.py +109 -0
warp/examples/fem/plot_utils.py +287 -0
warp/examples/optim/example_bounce.py +248 -0
warp/examples/optim/example_cloth_throw.py +210 -0
warp/examples/optim/example_diffray.py +535 -0
warp/examples/optim/example_drone.py +850 -0
warp/examples/optim/example_inverse_kinematics.py +169 -0
warp/examples/optim/example_inverse_kinematics_torch.py +170 -0
warp/examples/optim/example_spring_cage.py +234 -0
warp/examples/optim/example_trajectory.py +201 -0
warp/examples/sim/example_cartpole.py +128 -0
warp/examples/sim/example_cloth.py +184 -0
warp/examples/sim/example_granular.py +113 -0
warp/examples/sim/example_granular_collision_sdf.py +185 -0
warp/examples/sim/example_jacobian_ik.py +213 -0
warp/examples/sim/example_particle_chain.py +106 -0
warp/examples/sim/example_quadruped.py +179 -0
warp/examples/sim/example_rigid_chain.py +191 -0
warp/examples/sim/example_rigid_contact.py +176 -0
warp/examples/sim/example_rigid_force.py +126 -0
warp/examples/sim/example_rigid_gyroscopic.py +97 -0
warp/examples/sim/example_rigid_soft_contact.py +124 -0
warp/examples/sim/example_soft_body.py +178 -0
warp/fabric.py +29 -20
warp/fem/cache.py +0 -1
warp/fem/dirichlet.py +0 -2
warp/fem/integrate.py +0 -1
warp/jax.py +45 -0
warp/jax_experimental.py +339 -0
warp/native/builtin.h +12 -0
warp/native/bvh.cu +18 -18
warp/native/clang/clang.cpp +8 -3
warp/native/cuda_util.cpp +94 -5
warp/native/cuda_util.h +35 -6
warp/native/cutlass_gemm.cpp +1 -1
warp/native/cutlass_gemm.cu +4 -1
warp/native/error.cpp +66 -0
warp/native/error.h +27 -0
warp/native/mesh.cu +2 -2
warp/native/reduce.cu +4 -4
warp/native/runlength_encode.cu +2 -2
warp/native/scan.cu +2 -2
warp/native/sparse.cu +0 -1
warp/native/temp_buffer.h +2 -2
warp/native/warp.cpp +95 -60
warp/native/warp.cu +1053 -218
warp/native/warp.h +49 -32
warp/optim/linear.py +33 -16
warp/render/render_opengl.py +202 -101
warp/render/render_usd.py +82 -40
warp/sim/__init__.py +13 -4
warp/sim/articulation.py +4 -5
warp/sim/collide.py +320 -175
warp/sim/import_mjcf.py +25 -30
warp/sim/import_urdf.py +94 -63
warp/sim/import_usd.py +51 -36
warp/sim/inertia.py +3 -2
warp/sim/integrator.py +233 -0
warp/sim/integrator_euler.py +447 -469
warp/sim/integrator_featherstone.py +1991 -0
warp/sim/integrator_xpbd.py +1420 -640
warp/sim/model.py +765 -487
warp/sim/particles.py +2 -1
warp/sim/render.py +35 -13
warp/sim/utils.py +222 -11
warp/stubs.py +8 -0
warp/tape.py +16 -1
warp/tests/aux_test_grad_customs.py +23 -0
warp/tests/test_array.py +190 -1
warp/tests/test_async.py +656 -0
warp/tests/test_bool.py +50 -0
warp/tests/test_dlpack.py +164 -11
warp/tests/test_examples.py +166 -74
warp/tests/test_fem.py +8 -1
warp/tests/test_generics.py +15 -5
warp/tests/test_grad.py +1 -1
warp/tests/test_grad_customs.py +172 -12
warp/tests/test_jax.py +254 -0
warp/tests/test_large.py +29 -6
warp/tests/test_launch.py +25 -0
warp/tests/test_linear_solvers.py +20 -3
warp/tests/test_matmul.py +61 -16
warp/tests/test_matmul_lite.py +13 -13
warp/tests/test_mempool.py +186 -0
warp/tests/test_multigpu.py +3 -0
warp/tests/test_options.py +16 -2
warp/tests/test_peer.py +137 -0
warp/tests/test_print.py +3 -1
warp/tests/test_quat.py +23 -0
warp/tests/test_sim_kinematics.py +97 -0
warp/tests/test_snippet.py +126 -3
warp/tests/test_streams.py +108 -79
warp/tests/test_torch.py +16 -8
warp/tests/test_utils.py +32 -27
warp/tests/test_verify_fp.py +65 -0
warp/tests/test_volume.py +1 -1
warp/tests/unittest_serial.py +2 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +14 -7
warp/thirdparty/unittest_parallel.py +15 -3
warp/torch.py +10 -8
warp/types.py +363 -246
warp/utils.py +143 -19
warp_lang-1.0.0.dist-info/LICENSE.md +126 -0
warp_lang-1.0.0.dist-info/METADATA +394 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/RECORD +167 -86
warp/sim/optimizer.py +0 -138
warp_lang-0.11.0.dist-info/LICENSE.md +0 -36
warp_lang-0.11.0.dist-info/METADATA +0 -238
/warp/tests/{walkthough_debug.py → walkthrough_debug.py} +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/WHEEL +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/top_level.txt +0 -0

warp/native/cuda_util.h CHANGED Viewed

@@ -17,8 +17,10 @@
 #include <stdio.h>
-#define check_cuda(code) (check_cuda_result(code, __FILE__, __LINE__))
-#define check_cu(code) (check_cu_result(code, __FILE__, __LINE__))
+#include <vector>
+#define check_cuda(code) (check_cuda_result(code, __FUNCTION__, __FILE__, __LINE__))
+#define check_cu(code) (check_cu_result(code, __FUNCTION__, __FILE__, __LINE__))
 #if defined(__CUDACC__)
@@ -55,6 +57,7 @@ CUresult cuDeviceGetUuid_f(CUuuid* uuid, CUdevice dev);
 CUresult cuDevicePrimaryCtxRetain_f(CUcontext* ctx, CUdevice dev);
 CUresult cuDevicePrimaryCtxRelease_f(CUdevice dev);
 CUresult cuDeviceCanAccessPeer_f(int* can_access, CUdevice dev, CUdevice peer_dev);
+CUresult cuMemGetInfo_f(size_t* free, size_t* total);
 CUresult cuCtxGetCurrent_f(CUcontext* ctx);
 CUresult cuCtxSetCurrent_f(CUcontext ctx);
 CUresult cuCtxPushCurrent_f(CUcontext ctx);
@@ -64,18 +67,23 @@ CUresult cuCtxGetDevice_f(CUdevice* dev);
 CUresult cuCtxCreate_f(CUcontext* ctx, unsigned int flags, CUdevice dev);
 CUresult cuCtxDestroy_f(CUcontext ctx);
 CUresult cuCtxEnablePeerAccess_f(CUcontext peer_ctx, unsigned int flags);
+CUresult cuCtxDisablePeerAccess_f(CUcontext peer_ctx);
 CUresult cuStreamCreate_f(CUstream* stream, unsigned int flags);
 CUresult cuStreamDestroy_f(CUstream stream);
 CUresult cuStreamSynchronize_f(CUstream stream);
 CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags);
+CUresult cuStreamGetCaptureInfo_f(CUstream stream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
+CUresult cuStreamUpdateCaptureDependencies_f(CUstream stream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
 CUresult cuEventCreate_f(CUevent* event, unsigned int flags);
 CUresult cuEventDestroy_f(CUevent event);
 CUresult cuEventRecord_f(CUevent event, CUstream stream);
+CUresult cuEventRecordWithFlags_f(CUevent event, CUstream stream, unsigned int flags);
 CUresult cuModuleUnload_f(CUmodule hmod);
 CUresult cuModuleLoadDataEx_f(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
 CUresult cuModuleGetFunction_f(CUfunction *hfunc, CUmodule hmod, const char *name);
 CUresult cuLaunchKernel_f(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
 CUresult cuMemcpyPeerAsync_f(CUdeviceptr dst_ptr, CUcontext dst_ctx, CUdeviceptr src_ptr, CUcontext src_ctx, size_t n, CUstream stream);
+CUresult cuPointerGetAttribute_f(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
 CUresult cuGraphicsMapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream stream);
 CUresult cuGraphicsUnmapResources_f(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
 CUresult cuGraphicsResourceGetMappedPointer_f(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
@@ -86,13 +94,34 @@ CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
 bool init_cuda_driver();
 bool is_cuda_driver_initialized();
-bool check_cuda_result(cudaError_t code, const char* file, int line);
-inline bool check_cuda_result(uint64_t code, const char* file, int line)
+bool check_cuda_result(cudaError_t code, const char* func, const char* file, int line);
+inline bool check_cuda_result(uint64_t code, const char* func, const char* file, int line)
+{
+    return check_cuda_result(static_cast<cudaError_t>(code), func, file, line);
+}
+bool check_cu_result(CUresult result, const char* func, const char* file, int line);
+inline uint64_t get_capture_id(CUstream stream)
 {
-    return check_cuda_result(static_cast<cudaError_t>(code), file, line);
+    CUstreamCaptureStatus status;
+    uint64_t id = 0;
+    check_cu(cuStreamGetCaptureInfo_f(stream, &status, &id, NULL, NULL, NULL));
+    return id;
 }
-bool check_cu_result(CUresult result, const char* file, int line);
+inline CUgraph get_capture_graph(CUstream stream)
+{
+    CUstreamCaptureStatus status;
+    CUgraph graph = NULL;
+    check_cu(cuStreamGetCaptureInfo_f(stream, &status, NULL, &graph, NULL, NULL));
+    return graph;
+}
+bool get_capture_dependencies(CUstream stream, std::vector<CUgraphNode>& dependencies_ret);
+bool get_graph_leaf_nodes(cudaGraph_t graph, std::vector<cudaGraphNode_t>& leaf_nodes_ret);
 //

warp/native/cutlass_gemm.cpp CHANGED Viewed

@@ -16,7 +16,7 @@ extern "C"
 WP_API
 bool cutlass_gemm(
-                  int compute_capability,
+                  void* context, int compute_capability,
                   int m, int n, int k,
                   const char* datatype_str,
                   const void* a, const void* b, const void* c, void* d,

warp/native/cutlass_gemm.cu CHANGED Viewed

@@ -8,6 +8,7 @@
 #include "builtin.h"
 #include "temp_buffer.h"
+#include "cuda_util.h"
 #include "cutlass/cutlass.h"
 #include "cutlass/gemm/device/gemm_universal.h"
@@ -226,7 +227,7 @@ extern "C" {
 WP_API
 bool cutlass_gemm(
-                  int compute_capability,
+                  void* context, int compute_capability,
                   int m, int n, int k,
                   const char* datatype_str,
                   const void* a, const void* b, const void* c, void* d,
@@ -237,6 +238,8 @@ bool cutlass_gemm(
     std::string datatype(datatype_str);
+    ContextGuard guard(context);
     // Specializations for using Tensor Cores and A/B RowMajor/ColumnMajor designations
     if (compute_capability == 80) {
         if (datatype == F64_STR) {

warp/native/error.cpp ADDED Viewed

@@ -0,0 +1,66 @@
+/** Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+namespace wp
+{
+static char g_error_buffer[4096] = "";
+static bool g_error_output_enabled = true;
+static FILE* g_error_stream = stderr;
+const char* get_error_string()
+{
+    return g_error_buffer;
+}
+void set_error_string(const char* fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    vsnprintf(g_error_buffer, sizeof(g_error_buffer), fmt, args);
+    if (g_error_output_enabled)
+    {
+        vfprintf(g_error_stream, fmt, args);
+        fputc('\n', g_error_stream);
+        fflush(g_error_stream);
+    }
+    va_end(args);
+}
+void append_error_string(const char* fmt, ...)
+{
+    size_t offset = strlen(g_error_buffer);
+    if (offset + 2 > sizeof(g_error_buffer))
+        return;
+    g_error_buffer[offset++] = '\n';
+    va_list args;
+    va_start(args, fmt);
+    vsnprintf(g_error_buffer + offset, sizeof(g_error_buffer) - offset, fmt, args);
+    if (g_error_output_enabled)
+    {
+        vfprintf(g_error_stream, fmt, args);
+        fputc('\n', g_error_stream);
+        fflush(g_error_stream);
+    }
+    va_end(args);
+}
+void set_error_output_enabled(bool enable)
+{
+    g_error_output_enabled = enable;
+}
+bool is_error_output_enabled()
+{
+    return g_error_output_enabled;
+}
+} // end of namespace wp

warp/native/error.h ADDED Viewed

@@ -0,0 +1,27 @@
+/** Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+#pragma once
+namespace wp
+{
+// functions related to error reporting
+// get error string from Python
+const char* get_error_string();
+// set error message for Python
+// these functions also print the error message if error output is enabled
+void set_error_string(const char* fmt, ...);
+void append_error_string(const char* fmt, ...);
+// allow disabling printing errors, which is handy during tests that expect failure
+void set_error_output_enabled(bool enable);
+bool is_error_output_enabled();
+}

warp/native/mesh.cu CHANGED Viewed

@@ -203,8 +203,8 @@ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::arr
         // bvh_destroy_host(bvh_host);
         // create lower upper arrays expected by GPU BVH builder
-        mesh.lowers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
-        mesh.uppers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
+        mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
+        mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
         wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));

warp/native/reduce.cu CHANGED Viewed

@@ -110,7 +110,7 @@ template <typename T> void array_sum_device(const T *ptr_a, T *ptr_out, int coun
     size_t buff_size = 0;
     check_cuda(cub::DeviceReduce::Sum(nullptr, buff_size, ptr_strided, ptr_out, count, stream));
-    void* temp_buffer = alloc_temp_device(WP_CURRENT_CONTEXT, buff_size);
+    void* temp_buffer = alloc_device(WP_CURRENT_CONTEXT, buff_size);
     for (int k = 0; k < type_length; ++k)
     {
@@ -118,7 +118,7 @@ template <typename T> void array_sum_device(const T *ptr_a, T *ptr_out, int coun
         check_cuda(cub::DeviceReduce::Sum(temp_buffer, buff_size, ptr_strided, ptr_out + k, count, stream));
     }
-    free_temp_device(WP_CURRENT_CONTEXT, temp_buffer);
+    free_device(WP_CURRENT_CONTEXT, temp_buffer);
 }
 template <typename T>
@@ -271,11 +271,11 @@ void array_inner_device(const ElemT *ptr_a, const ElemT *ptr_b, ScalarT *ptr_out
     size_t buff_size = 0;
     check_cuda(cub::DeviceReduce::Sum(nullptr, buff_size, inner_iterator, ptr_out, count, stream));
-    void* temp_buffer = alloc_temp_device(WP_CURRENT_CONTEXT, buff_size);
+    void* temp_buffer = alloc_device(WP_CURRENT_CONTEXT, buff_size);
     check_cuda(cub::DeviceReduce::Sum(temp_buffer, buff_size, inner_iterator, ptr_out, count, stream));
-    free_temp_device(WP_CURRENT_CONTEXT, temp_buffer);
+    free_device(WP_CURRENT_CONTEXT, temp_buffer);
 }
 template <typename T>

warp/native/runlength_encode.cu CHANGED Viewed

@@ -21,13 +21,13 @@ void runlength_encode_device(int n,
         nullptr, buff_size, values, run_values, run_lengths, run_count,
         n, stream));
-    void* temp_buffer = alloc_temp_device(WP_CURRENT_CONTEXT, buff_size);
+    void* temp_buffer = alloc_device(WP_CURRENT_CONTEXT, buff_size);
     check_cuda(cub::DeviceRunLengthEncode::Encode(
         temp_buffer, buff_size, values, run_values, run_lengths, run_count,
         n, stream));
-    free_temp_device(WP_CURRENT_CONTEXT, temp_buffer);
+    free_device(WP_CURRENT_CONTEXT, temp_buffer);
 }
 void runlength_encode_int_device(

warp/native/scan.cu CHANGED Viewed

@@ -20,7 +20,7 @@ void scan_device(const T* values_in, T* values_out, int n, bool inclusive)
         check_cuda(cub::DeviceScan::ExclusiveSum(NULL, scan_temp_size, values_in, values_out, n));
     }
-    void* temp_buffer = alloc_temp_device(WP_CURRENT_CONTEXT, scan_temp_size);
+    void* temp_buffer = alloc_device(WP_CURRENT_CONTEXT, scan_temp_size);
     // scan
     if (inclusive) {
@@ -29,7 +29,7 @@ void scan_device(const T* values_in, T* values_out, int n, bool inclusive)
         check_cuda(cub::DeviceScan::ExclusiveSum(temp_buffer, scan_temp_size, values_in, values_out, n, stream));
     }
-    free_temp_device(WP_CURRENT_CONTEXT, temp_buffer);
+    free_device(WP_CURRENT_CONTEXT, temp_buffer);
 }
 template void scan_device(const int*, int*, int, bool);

warp/native/sparse.cu CHANGED Viewed

@@ -456,7 +456,6 @@ void bsr_transpose_device(int rows_per_block, int cols_per_block, int row_count,
     size_t buff_size = 0;
     check_cuda(cub::DeviceRadixSort::SortPairs(nullptr, buff_size, d_values,
                                               d_keys, nnz, 0, 64, stream));
-    void* temp_buffer = alloc_temp_device(WP_CURRENT_CONTEXT, buff_size);
     ScopedTemporary<> temp(context, buff_size);
     check_cuda(cub::DeviceRadixSort::SortPairs(
         temp.buffer(), buff_size, d_values, d_keys, nnz, 0, 64, stream));

warp/native/temp_buffer.h CHANGED Viewed

@@ -10,13 +10,13 @@ template <typename T = char> struct ScopedTemporary
 {
     ScopedTemporary(void *context, size_t size)
-        : m_context(context), m_buffer(static_cast<T*>(alloc_temp_device(m_context, size * sizeof(T))))
+        : m_context(context), m_buffer(static_cast<T*>(alloc_device(m_context, size * sizeof(T))))
     {
     }
     ~ScopedTemporary()
     {
-        free_temp_device(m_context, m_buffer);
+        free_device(m_context, m_buffer);
     }
     T *buffer() const