warp-lang 1.4.1__py3-none-macosx_10_13_universal2.whl → 1.5.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +4 -0
- warp/autograd.py +43 -8
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +21 -2
- warp/build_dll.py +23 -6
- warp/builtins.py +1920 -111
- warp/codegen.py +186 -62
- warp/config.py +2 -2
- warp/context.py +322 -73
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
- warp/examples/benchmarks/benchmark_gemm.py +121 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
- warp/examples/benchmarks/benchmark_tile.py +179 -0
- warp/examples/core/example_dem.py +2 -1
- warp/examples/core/example_mesh_intersect.py +3 -3
- warp/examples/fem/example_adaptive_grid.py +37 -10
- warp/examples/fem/example_apic_fluid.py +3 -2
- warp/examples/fem/example_convection_diffusion_dg.py +4 -5
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +47 -4
- warp/examples/fem/example_distortion_energy.py +220 -0
- warp/examples/fem/example_magnetostatics.py +127 -85
- warp/examples/fem/example_nonconforming_contact.py +5 -5
- warp/examples/fem/example_stokes.py +3 -1
- warp/examples/fem/example_streamlines.py +12 -19
- warp/examples/fem/utils.py +38 -15
- warp/examples/optim/example_walker.py +2 -2
- warp/examples/sim/example_cloth.py +2 -25
- warp/examples/sim/example_jacobian_ik.py +6 -2
- warp/examples/sim/example_quadruped.py +2 -1
- warp/examples/tile/example_tile_convolution.py +58 -0
- warp/examples/tile/example_tile_fft.py +47 -0
- warp/examples/tile/example_tile_filtering.py +105 -0
- warp/examples/tile/example_tile_matmul.py +79 -0
- warp/examples/tile/example_tile_mlp.py +375 -0
- warp/fem/__init__.py +8 -0
- warp/fem/cache.py +16 -12
- warp/fem/dirichlet.py +1 -1
- warp/fem/domain.py +44 -1
- warp/fem/field/__init__.py +1 -2
- warp/fem/field/field.py +31 -19
- warp/fem/field/nodal_field.py +101 -49
- warp/fem/field/virtual.py +794 -0
- warp/fem/geometry/__init__.py +2 -2
- warp/fem/geometry/deformed_geometry.py +3 -105
- warp/fem/geometry/element.py +13 -0
- warp/fem/geometry/geometry.py +165 -5
- warp/fem/geometry/grid_2d.py +3 -6
- warp/fem/geometry/grid_3d.py +31 -28
- warp/fem/geometry/hexmesh.py +3 -46
- warp/fem/geometry/nanogrid.py +3 -2
- warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
- warp/fem/geometry/tetmesh.py +2 -43
- warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
- warp/fem/integrate.py +683 -261
- warp/fem/linalg.py +404 -0
- warp/fem/operator.py +101 -18
- warp/fem/polynomial.py +5 -5
- warp/fem/quadrature/quadrature.py +45 -21
- warp/fem/space/__init__.py +45 -11
- warp/fem/space/basis_function_space.py +451 -0
- warp/fem/space/basis_space.py +58 -11
- warp/fem/space/function_space.py +146 -5
- warp/fem/space/grid_2d_function_space.py +80 -66
- warp/fem/space/grid_3d_function_space.py +113 -68
- warp/fem/space/hexmesh_function_space.py +96 -108
- warp/fem/space/nanogrid_function_space.py +62 -110
- warp/fem/space/quadmesh_function_space.py +208 -0
- warp/fem/space/shape/__init__.py +45 -7
- warp/fem/space/shape/cube_shape_function.py +328 -54
- warp/fem/space/shape/shape_function.py +10 -1
- warp/fem/space/shape/square_shape_function.py +328 -60
- warp/fem/space/shape/tet_shape_function.py +269 -19
- warp/fem/space/shape/triangle_shape_function.py +238 -19
- warp/fem/space/tetmesh_function_space.py +69 -37
- warp/fem/space/topology.py +38 -0
- warp/fem/space/trimesh_function_space.py +179 -0
- warp/fem/utils.py +6 -331
- warp/jax_experimental.py +3 -1
- warp/native/array.h +55 -40
- warp/native/builtin.h +124 -43
- warp/native/bvh.h +4 -0
- warp/native/coloring.cpp +600 -0
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -1
- warp/native/fabric.h +8 -0
- warp/native/hashgrid.h +4 -0
- warp/native/marching.cu +8 -0
- warp/native/mat.h +14 -3
- warp/native/mathdx.cpp +59 -0
- warp/native/mesh.h +4 -0
- warp/native/range.h +13 -1
- warp/native/reduce.cpp +9 -1
- warp/native/reduce.cu +7 -0
- warp/native/runlength_encode.cpp +9 -1
- warp/native/runlength_encode.cu +7 -1
- warp/native/scan.cpp +8 -0
- warp/native/scan.cu +8 -0
- warp/native/scan.h +8 -1
- warp/native/sparse.cpp +8 -0
- warp/native/sparse.cu +8 -0
- warp/native/temp_buffer.h +7 -0
- warp/native/tile.h +1857 -0
- warp/native/tile_gemm.h +341 -0
- warp/native/tile_reduce.h +210 -0
- warp/native/volume_builder.cu +8 -0
- warp/native/volume_builder.h +8 -0
- warp/native/warp.cpp +10 -2
- warp/native/warp.cu +369 -15
- warp/native/warp.h +12 -2
- warp/optim/adam.py +39 -4
- warp/paddle.py +29 -12
- warp/render/render_opengl.py +137 -65
- warp/sim/graph_coloring.py +292 -0
- warp/sim/integrator_euler.py +4 -2
- warp/sim/integrator_featherstone.py +115 -44
- warp/sim/integrator_vbd.py +6 -0
- warp/sim/model.py +90 -17
- warp/stubs.py +651 -85
- warp/tape.py +12 -7
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/aux_test_instancing_gc.py +18 -0
- warp/tests/test_array.py +207 -48
- warp/tests/test_closest_point_edge_edge.py +8 -8
- warp/tests/test_codegen.py +120 -1
- warp/tests/test_codegen_instancing.py +30 -0
- warp/tests/test_collision.py +110 -0
- warp/tests/test_coloring.py +241 -0
- warp/tests/test_context.py +34 -0
- warp/tests/test_examples.py +18 -4
- warp/tests/test_fabricarray.py +33 -0
- warp/tests/test_fem.py +453 -113
- warp/tests/test_func.py +48 -1
- warp/tests/test_generics.py +52 -0
- warp/tests/test_iter.py +68 -0
- warp/tests/test_mat_scalar_ops.py +1 -1
- warp/tests/test_mesh_query_point.py +5 -4
- warp/tests/test_module_hashing.py +23 -0
- warp/tests/test_paddle.py +27 -87
- warp/tests/test_print.py +191 -1
- warp/tests/test_spatial.py +1 -1
- warp/tests/test_tile.py +700 -0
- warp/tests/test_tile_mathdx.py +144 -0
- warp/tests/test_tile_mlp.py +383 -0
- warp/tests/test_tile_reduce.py +374 -0
- warp/tests/test_tile_shared_memory.py +190 -0
- warp/tests/test_vbd.py +12 -20
- warp/tests/test_volume.py +43 -0
- warp/tests/unittest_suites.py +23 -2
- warp/tests/unittest_utils.py +4 -0
- warp/types.py +339 -73
- warp/utils.py +22 -1
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
- warp/fem/field/test.py +0 -180
- warp/fem/field/trial.py +0 -183
- warp/fem/space/collocated_function_space.py +0 -102
- warp/fem/space/quadmesh_2d_function_space.py +0 -261
- warp/fem/space/trimesh_2d_function_space.py +0 -153
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0
warp/native/coloring.cpp
ADDED
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
/** Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// The Apache 2 License
|
|
10
|
+
|
|
11
|
+
// Copyright 2023 Anka He Chen
|
|
12
|
+
//
|
|
13
|
+
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
14
|
+
// use this file except in compliance with the License.You may obtain a copy of the License at
|
|
15
|
+
// http ://www.apache.org/licenses/LICENSE-2.0
|
|
16
|
+
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
17
|
+
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
18
|
+
// either express or implied.See the License for the specific language governing permissions
|
|
19
|
+
// and limitations under the License.
|
|
20
|
+
//
|
|
21
|
+
// Source: https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.cpp
|
|
22
|
+
// https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.h
|
|
23
|
+
// https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/Graph.h
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
#include "warp.h"
|
|
28
|
+
|
|
29
|
+
#include <iostream>
|
|
30
|
+
#include <vector>
|
|
31
|
+
#include <array>
|
|
32
|
+
#include <queue>
|
|
33
|
+
#include <queue>
|
|
34
|
+
#include <unordered_set>
|
|
35
|
+
#include <random>
|
|
36
|
+
#include <algorithm>
|
|
37
|
+
#include <numeric>
|
|
38
|
+
|
|
39
|
+
#define SHRINK_GRAPH_PER_PERCENTAGE (5)
|
|
40
|
+
#define NODE_WEIGHTS_PREALLOC (64)
|
|
41
|
+
#define WEIGHT_BUCKET_PREALLOC (512)
|
|
42
|
+
|
|
43
|
+
namespace wp
|
|
44
|
+
{
|
|
45
|
+
|
|
46
|
+
struct Graph
|
|
47
|
+
{
|
|
48
|
+
Graph(int num_nodes_in, const wp::array_t<int>& edges)
|
|
49
|
+
: num_nodes(num_nodes_in)
|
|
50
|
+
{
|
|
51
|
+
node_offsets.resize(num_nodes + 1, 0);
|
|
52
|
+
node_colors.resize(num_nodes, -1);
|
|
53
|
+
|
|
54
|
+
std::vector<int> node_degrees(num_nodes, 0);
|
|
55
|
+
|
|
56
|
+
// count degrees
|
|
57
|
+
for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
|
|
58
|
+
{
|
|
59
|
+
int e0 = *address(edges, edge_idx, 0);
|
|
60
|
+
int e1 = *address(edges, edge_idx, 1);
|
|
61
|
+
node_degrees[e0] += 1;
|
|
62
|
+
node_degrees[e1] += 1;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
int offset = 0;
|
|
66
|
+
for (size_t node = 0; node < num_nodes; node++)
|
|
67
|
+
{
|
|
68
|
+
offset += node_degrees[node];
|
|
69
|
+
node_offsets[node + 1] = offset;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// fill adjacency list
|
|
73
|
+
std::vector<int> node_adjacency_fill_count(num_nodes, 0);
|
|
74
|
+
graph_flatten.resize(offset, -1);
|
|
75
|
+
for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
|
|
76
|
+
{
|
|
77
|
+
int e0 = *address(edges, edge_idx, 0);
|
|
78
|
+
int e1 = *address(edges, edge_idx, 1);
|
|
79
|
+
|
|
80
|
+
int fill_count_e0 = node_adjacency_fill_count[e0];
|
|
81
|
+
graph_flatten[node_offsets[e0] + fill_count_e0] = e1;
|
|
82
|
+
|
|
83
|
+
int fill_count_e1 = node_adjacency_fill_count[e1];
|
|
84
|
+
graph_flatten[node_offsets[e1] + fill_count_e1] = e0;
|
|
85
|
+
|
|
86
|
+
node_adjacency_fill_count[e0] = fill_count_e0 + 1;
|
|
87
|
+
node_adjacency_fill_count[e1] = fill_count_e1 + 1;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
int get_node_neighbor(int node, int neighbor_index) const {
|
|
93
|
+
return graph_flatten[node_offsets[node] + neighbor_index];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
int get_node_degree(int node) const {
|
|
97
|
+
return node_offsets[node + 1] - node_offsets[node];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
int num_nodes;
|
|
102
|
+
std::vector<int> graph_flatten;
|
|
103
|
+
std::vector<int> node_offsets;
|
|
104
|
+
std::vector<int> node_colors;
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
void convert_to_color_groups(const int num_colors, const std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
|
|
108
|
+
{
|
|
109
|
+
color_groups.resize(num_colors);
|
|
110
|
+
|
|
111
|
+
for (int node_idx = 0; node_idx < node_colors.size(); node_idx++) {
|
|
112
|
+
int color = node_colors[node_idx];
|
|
113
|
+
color_groups[color].push_back(node_idx);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
float find_largest_smallest_groups(const std::vector<std::vector<int>>& color_groups, int& biggest_group, int& smallest_group)
|
|
118
|
+
{
|
|
119
|
+
if (color_groups.size() == 0)
|
|
120
|
+
{
|
|
121
|
+
biggest_group = -1;
|
|
122
|
+
smallest_group = -1;
|
|
123
|
+
|
|
124
|
+
return 1;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
size_t max_size = color_groups[0].size();
|
|
128
|
+
biggest_group = 0;
|
|
129
|
+
size_t min_size = color_groups[0].size();
|
|
130
|
+
smallest_group = 0;
|
|
131
|
+
|
|
132
|
+
for (size_t color = 0; color < color_groups.size(); color++)
|
|
133
|
+
{
|
|
134
|
+
if (max_size < color_groups[color].size()) {
|
|
135
|
+
biggest_group = color;
|
|
136
|
+
max_size = color_groups[color].size();
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (min_size > color_groups[color].size())
|
|
140
|
+
{
|
|
141
|
+
smallest_group = color;
|
|
142
|
+
min_size = color_groups[color].size();
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return float(color_groups[biggest_group].size()) / float(color_groups[smallest_group].size());
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
bool color_changeable(const Graph& graph, int node, int target_color){
|
|
150
|
+
// loop through node and see if it has target color
|
|
151
|
+
for (size_t i = 0; i < graph.get_node_degree(node); i++)
|
|
152
|
+
{
|
|
153
|
+
int nei_node_idx = graph.get_node_neighbor(node, i);
|
|
154
|
+
if (graph.node_colors[nei_node_idx] == target_color)
|
|
155
|
+
{
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return true;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
int find_changeable_node_in_category(
|
|
163
|
+
const Graph& graph,
|
|
164
|
+
const std::vector<std::vector<int>>& color_groups,
|
|
165
|
+
int source_color,
|
|
166
|
+
int target_color
|
|
167
|
+
)
|
|
168
|
+
{
|
|
169
|
+
auto& source_group = color_groups[source_color];
|
|
170
|
+
for (size_t node_idx = 0; node_idx < source_group.size(); node_idx++)
|
|
171
|
+
{
|
|
172
|
+
if (color_changeable(graph, source_group[node_idx], target_color)) {
|
|
173
|
+
return node_idx;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return -1;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
void change_color(int color, int node_idx_in_group, int target_color, std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
|
|
180
|
+
{
|
|
181
|
+
int node_idx = color_groups[color][node_idx_in_group];
|
|
182
|
+
node_colors[node_idx] = target_color;
|
|
183
|
+
|
|
184
|
+
if (color_groups.size())
|
|
185
|
+
{
|
|
186
|
+
// O(1) erase
|
|
187
|
+
std::swap(color_groups[color][node_idx_in_group], color_groups[color].back());
|
|
188
|
+
color_groups[color].pop_back();
|
|
189
|
+
|
|
190
|
+
color_groups[target_color].push_back(node_idx);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
float balance_color_groups(float target_max_min_ratio,
|
|
195
|
+
Graph& graph,
|
|
196
|
+
std::vector<std::vector<int>>& color_groups)
|
|
197
|
+
{
|
|
198
|
+
float max_min_ratio = -1.f;
|
|
199
|
+
|
|
200
|
+
do
|
|
201
|
+
{
|
|
202
|
+
int biggest_group = -1, smallest_group = -1;
|
|
203
|
+
|
|
204
|
+
max_min_ratio = find_largest_smallest_groups(color_groups, biggest_group, smallest_group);
|
|
205
|
+
|
|
206
|
+
// graph is not optimizable anymore or target ratio reached
|
|
207
|
+
if (color_groups[biggest_group].size() - color_groups[smallest_group].size() <= 2
|
|
208
|
+
|| max_min_ratio < target_max_min_ratio)
|
|
209
|
+
{
|
|
210
|
+
return max_min_ratio;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// find a available vertex from the biggest category to move to the smallest category
|
|
214
|
+
int changeable_color_group_idx = biggest_group;
|
|
215
|
+
int changeable_node_idx = find_changeable_node_in_category(graph, color_groups, biggest_group, smallest_group);
|
|
216
|
+
if (changeable_node_idx == -1)
|
|
217
|
+
{
|
|
218
|
+
for (size_t color = 0; color < color_groups.size(); color++)
|
|
219
|
+
{
|
|
220
|
+
if (color == biggest_group || color == smallest_group)
|
|
221
|
+
{
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
changeable_node_idx = find_changeable_node_in_category(graph, color_groups, color, smallest_group);
|
|
226
|
+
|
|
227
|
+
if (changeable_node_idx != -1)
|
|
228
|
+
{
|
|
229
|
+
changeable_color_group_idx = color;
|
|
230
|
+
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
if (changeable_node_idx == -1)
|
|
238
|
+
{
|
|
239
|
+
// fprintf(stderr, "The graph is not optimizable anymore, terminated with a max/min ratio: %f without reaching the target ratio: %f\n", max_min_ratio, target_max_min_ratio);
|
|
240
|
+
return max_min_ratio;
|
|
241
|
+
}
|
|
242
|
+
// change the color of changeable_color_idx in group changeable_color_group_idx to
|
|
243
|
+
change_color(changeable_color_group_idx, changeable_node_idx, smallest_group, graph.node_colors, color_groups);
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
} while (max_min_ratio > target_max_min_ratio);
|
|
247
|
+
|
|
248
|
+
return max_min_ratio;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
int graph_coloring_ordered_greedy(const std::vector<int>& order, Graph& graph)
|
|
252
|
+
{
|
|
253
|
+
// greedy coloring
|
|
254
|
+
int max_color = -1;
|
|
255
|
+
int num_colored = 0;
|
|
256
|
+
std::vector<bool> color_used;
|
|
257
|
+
color_used.reserve(128);
|
|
258
|
+
|
|
259
|
+
for (size_t i = 0; i < order.size(); i++)
|
|
260
|
+
{
|
|
261
|
+
int node = order[i];
|
|
262
|
+
|
|
263
|
+
// first one
|
|
264
|
+
if (max_color == -1)
|
|
265
|
+
{
|
|
266
|
+
++max_color;
|
|
267
|
+
graph.node_colors[node] = max_color;
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
color_used.resize(max_color + 1);
|
|
271
|
+
|
|
272
|
+
for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
|
|
273
|
+
{
|
|
274
|
+
color_used[color_counter] = false;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// see its neighbor's color
|
|
278
|
+
for (int nei_counter = 0; nei_counter < graph.get_node_degree(node); nei_counter++)
|
|
279
|
+
{
|
|
280
|
+
int nei_node_idx = graph.get_node_neighbor(node, nei_counter);
|
|
281
|
+
if (graph.node_colors[nei_node_idx] >= 0)
|
|
282
|
+
{
|
|
283
|
+
color_used[graph.node_colors[nei_node_idx]] = true;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// find the minimal usable color
|
|
288
|
+
int min_usable_color = -1;
|
|
289
|
+
for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
|
|
290
|
+
{
|
|
291
|
+
if (!color_used[color_counter]) {
|
|
292
|
+
min_usable_color = color_counter;
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
if (min_usable_color == -1)
|
|
297
|
+
{
|
|
298
|
+
++max_color;
|
|
299
|
+
graph.node_colors[node] = max_color;
|
|
300
|
+
}
|
|
301
|
+
else
|
|
302
|
+
{
|
|
303
|
+
graph.node_colors[node] = min_usable_color;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
num_colored++;
|
|
308
|
+
}
|
|
309
|
+
return (max_color + 1);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
class NodeWeightBuckets
|
|
313
|
+
{
|
|
314
|
+
public:
|
|
315
|
+
NodeWeightBuckets(int num_nodes)
|
|
316
|
+
: node_weights(num_nodes, 0), node_indices_in_bucket(num_nodes, -1)
|
|
317
|
+
{
|
|
318
|
+
weight_buckets.resize(NODE_WEIGHTS_PREALLOC);
|
|
319
|
+
for (size_t i = 1; i < weight_buckets.size(); i++)
|
|
320
|
+
{
|
|
321
|
+
weight_buckets[i].reserve(WEIGHT_BUCKET_PREALLOC);
|
|
322
|
+
}
|
|
323
|
+
max_weight = 0;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
int get_node_weight(int node_idx)
|
|
327
|
+
{
|
|
328
|
+
return node_weights[node_idx];
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
void add_node(int weight, int node_idx)
|
|
332
|
+
{
|
|
333
|
+
if (weight >= weight_buckets.size())
|
|
334
|
+
{
|
|
335
|
+
weight_buckets.resize(weight + 1);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
node_indices_in_bucket[node_idx] = weight_buckets[weight].size();
|
|
339
|
+
node_weights[node_idx] = weight;
|
|
340
|
+
weight_buckets[weight].push_back(node_idx);
|
|
341
|
+
|
|
342
|
+
if (max_weight < weight)
|
|
343
|
+
{
|
|
344
|
+
max_weight = weight;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
int pop_node_with_max_weight() {
|
|
349
|
+
int node_with_max_weight = weight_buckets[max_weight].front();
|
|
350
|
+
node_indices_in_bucket[node_with_max_weight] = -1;
|
|
351
|
+
|
|
352
|
+
// we pop the first element so it has a breadth-first like behavior, which is better than depth-first
|
|
353
|
+
if (weight_buckets[max_weight].size() > 1)
|
|
354
|
+
{
|
|
355
|
+
node_indices_in_bucket[weight_buckets[max_weight].back()] = 0;
|
|
356
|
+
weight_buckets[max_weight][0] = weight_buckets[max_weight].back();
|
|
357
|
+
}
|
|
358
|
+
weight_buckets[max_weight].pop_back();
|
|
359
|
+
// mark node deleted
|
|
360
|
+
node_weights[node_with_max_weight] = -1;
|
|
361
|
+
|
|
362
|
+
if (weight_buckets[max_weight].size() == 0)
|
|
363
|
+
// we need to update max_weight because weight_buckets[max_weight] became empty
|
|
364
|
+
{
|
|
365
|
+
int new_max_weight = 0;
|
|
366
|
+
for (size_t bucket_idx = max_weight - 1; bucket_idx >= 0; bucket_idx--)
|
|
367
|
+
{
|
|
368
|
+
if (weight_buckets[bucket_idx].size())
|
|
369
|
+
{
|
|
370
|
+
new_max_weight = bucket_idx;
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
max_weight = new_max_weight;
|
|
376
|
+
}
|
|
377
|
+
// mark deleted
|
|
378
|
+
return node_with_max_weight;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
void increase_node_weight(int node_idx)
|
|
382
|
+
{
|
|
383
|
+
int weight = node_weights[node_idx];
|
|
384
|
+
assert(weight < weight_buckets.size());
|
|
385
|
+
int node_idx_in_bucket = node_indices_in_bucket[node_idx];
|
|
386
|
+
assert(node_idx_in_bucket < weight_buckets[weight].size());
|
|
387
|
+
|
|
388
|
+
// swap index with the last element
|
|
389
|
+
node_indices_in_bucket[weight_buckets[weight].back()] = node_idx_in_bucket;
|
|
390
|
+
// O(1) erase
|
|
391
|
+
weight_buckets[weight][node_idx_in_bucket] = weight_buckets[weight].back();
|
|
392
|
+
weight_buckets[weight].pop_back();
|
|
393
|
+
|
|
394
|
+
add_node(weight + 1, node_idx);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
bool empty()
|
|
398
|
+
{
|
|
399
|
+
return max_weight <= 0 && weight_buckets[0].size() == 0;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
private:
|
|
404
|
+
int max_weight;
|
|
405
|
+
std::vector<std::vector<int>> weight_buckets;
|
|
406
|
+
std::vector<int> node_indices_in_bucket;
|
|
407
|
+
std::vector<int> node_weights;
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
// Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
411
|
+
int graph_coloring_mcs_vector(Graph& graph)
|
|
412
|
+
{
|
|
413
|
+
// Initially set the weight of each node to 0
|
|
414
|
+
std::vector<int> ordering;
|
|
415
|
+
ordering.reserve(graph.num_nodes);
|
|
416
|
+
|
|
417
|
+
NodeWeightBuckets weight_buckets(graph.num_nodes);
|
|
418
|
+
// add the first node
|
|
419
|
+
weight_buckets.add_node(0, 0);
|
|
420
|
+
|
|
421
|
+
for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++)
|
|
422
|
+
{
|
|
423
|
+
// this might look like it's O(N^2) but this only happens once per connected components
|
|
424
|
+
if (weight_buckets.empty())
|
|
425
|
+
{
|
|
426
|
+
int non_negative_node = -1;
|
|
427
|
+
for (size_t i = 0; i < graph.num_nodes; i++)
|
|
428
|
+
{
|
|
429
|
+
if (weight_buckets.get_node_weight(i) >= 0) {
|
|
430
|
+
non_negative_node = i;
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
assert(weight_buckets.get_node_weight(non_negative_node) == 0);
|
|
435
|
+
weight_buckets.add_node(0, non_negative_node);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
int max_node = weight_buckets.pop_node_with_max_weight();
|
|
439
|
+
|
|
440
|
+
// Add highest weight node to the queue and increment all of its neighbors weights by 1
|
|
441
|
+
ordering.push_back(max_node);
|
|
442
|
+
|
|
443
|
+
for (unsigned j = 0; j < graph.get_node_degree(max_node); j++) {
|
|
444
|
+
int neighbor_node = graph.get_node_neighbor(max_node, j);
|
|
445
|
+
int old_weight = weight_buckets.get_node_weight(neighbor_node);
|
|
446
|
+
|
|
447
|
+
if (old_weight == 0)
|
|
448
|
+
// 0-weighted node is not in buckets by default
|
|
449
|
+
{
|
|
450
|
+
weight_buckets.add_node(old_weight + 1, neighbor_node);
|
|
451
|
+
|
|
452
|
+
}
|
|
453
|
+
else if (old_weight > 0) {
|
|
454
|
+
weight_buckets.increase_node_weight(neighbor_node);
|
|
455
|
+
}
|
|
456
|
+
// skip neighbor nodes with negative weight because they are visited
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return graph_coloring_ordered_greedy(ordering, graph);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
int next_node(const int num_nodes, const std::vector<int>& degrees)
|
|
464
|
+
{
|
|
465
|
+
int node_min_degrees = -1;
|
|
466
|
+
int min_degree = num_nodes + 1;
|
|
467
|
+
for (size_t node_idx = 0; node_idx < degrees.size(); node_idx++)
|
|
468
|
+
{
|
|
469
|
+
if (degrees[node_idx] == -1)
|
|
470
|
+
{
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
if (min_degree > degrees[node_idx]) {
|
|
474
|
+
min_degree = degrees[node_idx];
|
|
475
|
+
node_min_degrees = node_idx;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
return node_min_degrees;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
void reduce_degree(int node_idx, Graph& graph, std::vector<int>& degrees)
|
|
482
|
+
{
|
|
483
|
+
degrees[node_idx] = -1;
|
|
484
|
+
for (size_t nei_node_counter = 0; nei_node_counter < graph.get_node_degree(node_idx); nei_node_counter++)
|
|
485
|
+
{
|
|
486
|
+
int nei_node_idx = graph.get_node_neighbor(node_idx, nei_node_counter);
|
|
487
|
+
|
|
488
|
+
if (degrees[nei_node_idx] != -1)
|
|
489
|
+
{
|
|
490
|
+
degrees[nei_node_idx]--;
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
// Fratarcangeli, Marco, and Fabio Pellacini. "Scalable partitioning for parallel position based dynamics." Computer Graphics Forum. Vol. 34. No. 2. 2015.
|
|
497
|
+
int graph_coloring_degree_ordered_greedy(Graph& graph)
|
|
498
|
+
{
|
|
499
|
+
// initialize the degree
|
|
500
|
+
std::vector<int> degrees(graph.num_nodes, 0);
|
|
501
|
+
for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++) {
|
|
502
|
+
degrees[node_idx] = graph.get_node_degree(node_idx);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// order them in a descending order
|
|
506
|
+
std::vector<int> ordering(graph.num_nodes);
|
|
507
|
+
std::iota(std::begin(ordering), std::end(ordering), 0);
|
|
508
|
+
std::sort(std::begin(ordering), std::end(ordering),
|
|
509
|
+
[°rees](const auto& lhs, const auto& rhs)
|
|
510
|
+
{
|
|
511
|
+
return degrees[lhs] > degrees[rhs];
|
|
512
|
+
}
|
|
513
|
+
);
|
|
514
|
+
|
|
515
|
+
return graph_coloring_ordered_greedy(ordering, graph);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
int graph_coloring_naive_greedy(Graph& graph)
|
|
519
|
+
{
|
|
520
|
+
std::vector<int> ordering(graph.num_nodes);
|
|
521
|
+
std::iota(std::begin(ordering), std::end(ordering), 0);
|
|
522
|
+
return graph_coloring_ordered_greedy(ordering, graph);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
using namespace wp;
|
|
526
|
+
|
|
527
|
+
extern "C"
|
|
528
|
+
{
|
|
529
|
+
int graph_coloring(int num_nodes, wp::array_t<int> edges, int algorithm, wp::array_t<int> node_colors)
|
|
530
|
+
{
|
|
531
|
+
if (node_colors.ndim != 1 || node_colors.shape[0] != num_nodes)
|
|
532
|
+
{
|
|
533
|
+
fprintf(stderr, "The node_colors array must have the preallocated shape of (num_nodes,)!\n");
|
|
534
|
+
return -1;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
if (edges.ndim != 2)
|
|
538
|
+
{
|
|
539
|
+
fprintf(stderr, "The edges array must have 2 dimensions!\n");
|
|
540
|
+
return -1;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
if (num_nodes == 0)
|
|
544
|
+
{
|
|
545
|
+
fprintf(stderr, "Empty graph!\n");
|
|
546
|
+
return -1;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
// convert to a format that coloring algorithm can recognize
|
|
550
|
+
|
|
551
|
+
Graph graph(num_nodes, edges);
|
|
552
|
+
|
|
553
|
+
int num_colors = -1;
|
|
554
|
+
switch (algorithm)
|
|
555
|
+
{
|
|
556
|
+
case 0:
|
|
557
|
+
// mcs algorithm
|
|
558
|
+
num_colors = graph_coloring_mcs_vector(graph);
|
|
559
|
+
break;
|
|
560
|
+
case 1:
|
|
561
|
+
// greedy
|
|
562
|
+
num_colors = graph_coloring_degree_ordered_greedy(graph);
|
|
563
|
+
break;
|
|
564
|
+
//case 2:
|
|
565
|
+
// // mcs algorithm
|
|
566
|
+
// num_colors = graph_coloring_mcs_set(graph);
|
|
567
|
+
// break;
|
|
568
|
+
//case 3:
|
|
569
|
+
// // naive greedy
|
|
570
|
+
// num_colors = graph_coloring_naive_greedy(graph);
|
|
571
|
+
// break;
|
|
572
|
+
default:
|
|
573
|
+
fprintf(stderr, "Unrecognized coloring algorithm number: %d!\n", algorithm);
|
|
574
|
+
return -1;
|
|
575
|
+
break;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// copy the color info back
|
|
579
|
+
memcpy(node_colors.data, graph.node_colors.data(), num_nodes * sizeof(int));
|
|
580
|
+
|
|
581
|
+
return num_colors;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
float balance_coloring(int num_nodes, wp::array_t<int> edges, int num_colors,
|
|
585
|
+
float target_max_min_ratio, wp::array_t<int> node_colors)
|
|
586
|
+
{
|
|
587
|
+
Graph graph(num_nodes, edges);
|
|
588
|
+
// copy the color info to graph
|
|
589
|
+
memcpy(graph.node_colors.data(), node_colors.data, num_nodes * sizeof(int));
|
|
590
|
+
if (num_colors > 1) {
|
|
591
|
+
std::vector<std::vector<int>> color_groups;
|
|
592
|
+
convert_to_color_groups(num_colors, graph.node_colors, color_groups);
|
|
593
|
+
return balance_color_groups(target_max_min_ratio, graph, color_groups);
|
|
594
|
+
}
|
|
595
|
+
else
|
|
596
|
+
{
|
|
597
|
+
return 1.f;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
warp/native/cuda_util.cpp
CHANGED
|
@@ -100,6 +100,8 @@ static PFN_cuGraphicsUnmapResources_v3000 pfn_cuGraphicsUnmapResources;
|
|
|
100
100
|
static PFN_cuGraphicsResourceGetMappedPointer_v3020 pfn_cuGraphicsResourceGetMappedPointer;
|
|
101
101
|
static PFN_cuGraphicsGLRegisterBuffer_v3000 pfn_cuGraphicsGLRegisterBuffer;
|
|
102
102
|
static PFN_cuGraphicsUnregisterResource_v3000 pfn_cuGraphicsUnregisterResource;
|
|
103
|
+
static PFN_cuModuleGetGlobal_v3020 pfn_cuModuleGetGlobal;
|
|
104
|
+
static PFN_cuFuncSetAttribute_v9000 pfn_cuFuncSetAttribute;
|
|
103
105
|
|
|
104
106
|
static bool cuda_driver_initialized = false;
|
|
105
107
|
|
|
@@ -231,6 +233,8 @@ bool init_cuda_driver()
|
|
|
231
233
|
get_driver_entry_point("cuGraphicsResourceGetMappedPointer", &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
|
|
232
234
|
get_driver_entry_point("cuGraphicsGLRegisterBuffer", &(void*&)pfn_cuGraphicsGLRegisterBuffer);
|
|
233
235
|
get_driver_entry_point("cuGraphicsUnregisterResource", &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
236
|
+
get_driver_entry_point("cuModuleGetGlobal", &(void*&)pfn_cuModuleGetGlobal);
|
|
237
|
+
get_driver_entry_point("cuFuncSetAttribute", &(void*&)pfn_cuFuncSetAttribute);
|
|
234
238
|
|
|
235
239
|
if (pfn_cuInit)
|
|
236
240
|
cuda_driver_initialized = check_cu(pfn_cuInit(0));
|
|
@@ -568,4 +572,14 @@ CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource)
|
|
|
568
572
|
return pfn_cuGraphicsUnregisterResource ? pfn_cuGraphicsUnregisterResource(resource) : DRIVER_ENTRY_POINT_ERROR;
|
|
569
573
|
}
|
|
570
574
|
|
|
575
|
+
CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name )
|
|
576
|
+
{
|
|
577
|
+
return pfn_cuModuleGetGlobal ? pfn_cuModuleGetGlobal(dptr, bytes, hmod, name) : DRIVER_ENTRY_POINT_ERROR;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value)
|
|
581
|
+
{
|
|
582
|
+
return pfn_cuFuncSetAttribute ? pfn_cuFuncSetAttribute(hfunc, attrib, value) : DRIVER_ENTRY_POINT_ERROR;
|
|
583
|
+
}
|
|
584
|
+
|
|
571
585
|
#endif // WP_ENABLE_CUDA
|
warp/native/cuda_util.h
CHANGED
|
@@ -99,7 +99,8 @@ CUresult cuGraphicsUnmapResources_f(unsigned int count, CUgraphicsResource* reso
|
|
|
99
99
|
CUresult cuGraphicsResourceGetMappedPointer_f(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
|
|
100
100
|
CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigned int buffer, unsigned int flags);
|
|
101
101
|
CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
|
|
102
|
-
|
|
102
|
+
CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name );
|
|
103
|
+
CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value);
|
|
103
104
|
|
|
104
105
|
bool init_cuda_driver();
|
|
105
106
|
bool is_cuda_driver_initialized();
|
warp/native/fabric.h
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#pragma once
|
|
2
10
|
|
|
3
11
|
#include "builtin.h"
|
warp/native/hashgrid.h
CHANGED
|
@@ -209,6 +209,10 @@ CUDA_CALLABLE inline hash_grid_query_t iter_reverse(const hash_grid_query_t& que
|
|
|
209
209
|
return query;
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
CUDA_CALLABLE inline void adj_iter_reverse(const hash_grid_query_t& query, hash_grid_query_t& adj_query, hash_grid_query_t& adj_ret)
|
|
213
|
+
{
|
|
214
|
+
}
|
|
215
|
+
|
|
212
216
|
|
|
213
217
|
|
|
214
218
|
CUDA_CALLABLE inline int hash_grid_point_id(uint64_t id, int& index)
|
warp/native/marching.cu
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
#include "cuda_util.h"
|
|
3
11
|
#include "scan.h"
|