warp-lang 1.4.2__py3-none-manylinux2014_aarch64.whl → 1.5.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (166) hide show
  1. warp/__init__.py +4 -0
  2. warp/autograd.py +43 -8
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +21 -2
  6. warp/build_dll.py +23 -6
  7. warp/builtins.py +1819 -7
  8. warp/codegen.py +197 -61
  9. warp/config.py +2 -2
  10. warp/context.py +379 -107
  11. warp/examples/assets/pixel.jpg +0 -0
  12. warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
  13. warp/examples/benchmarks/benchmark_gemm.py +121 -0
  14. warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
  15. warp/examples/benchmarks/benchmark_tile.py +179 -0
  16. warp/examples/fem/example_adaptive_grid.py +37 -10
  17. warp/examples/fem/example_apic_fluid.py +3 -2
  18. warp/examples/fem/example_convection_diffusion_dg.py +4 -5
  19. warp/examples/fem/example_deformed_geometry.py +1 -1
  20. warp/examples/fem/example_diffusion_3d.py +47 -4
  21. warp/examples/fem/example_distortion_energy.py +220 -0
  22. warp/examples/fem/example_magnetostatics.py +127 -85
  23. warp/examples/fem/example_nonconforming_contact.py +5 -5
  24. warp/examples/fem/example_stokes.py +3 -1
  25. warp/examples/fem/example_streamlines.py +12 -19
  26. warp/examples/fem/utils.py +38 -15
  27. warp/examples/sim/example_cloth.py +4 -25
  28. warp/examples/sim/example_quadruped.py +2 -1
  29. warp/examples/tile/example_tile_convolution.py +58 -0
  30. warp/examples/tile/example_tile_fft.py +47 -0
  31. warp/examples/tile/example_tile_filtering.py +105 -0
  32. warp/examples/tile/example_tile_matmul.py +79 -0
  33. warp/examples/tile/example_tile_mlp.py +375 -0
  34. warp/fem/__init__.py +8 -0
  35. warp/fem/cache.py +16 -12
  36. warp/fem/dirichlet.py +1 -1
  37. warp/fem/domain.py +44 -1
  38. warp/fem/field/__init__.py +1 -2
  39. warp/fem/field/field.py +31 -19
  40. warp/fem/field/nodal_field.py +101 -49
  41. warp/fem/field/virtual.py +794 -0
  42. warp/fem/geometry/__init__.py +2 -2
  43. warp/fem/geometry/deformed_geometry.py +3 -105
  44. warp/fem/geometry/element.py +13 -0
  45. warp/fem/geometry/geometry.py +165 -7
  46. warp/fem/geometry/grid_2d.py +3 -6
  47. warp/fem/geometry/grid_3d.py +31 -28
  48. warp/fem/geometry/hexmesh.py +3 -46
  49. warp/fem/geometry/nanogrid.py +3 -2
  50. warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
  51. warp/fem/geometry/tetmesh.py +2 -43
  52. warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
  53. warp/fem/integrate.py +683 -261
  54. warp/fem/linalg.py +404 -0
  55. warp/fem/operator.py +101 -18
  56. warp/fem/polynomial.py +5 -5
  57. warp/fem/quadrature/quadrature.py +45 -21
  58. warp/fem/space/__init__.py +45 -11
  59. warp/fem/space/basis_function_space.py +451 -0
  60. warp/fem/space/basis_space.py +58 -11
  61. warp/fem/space/function_space.py +146 -5
  62. warp/fem/space/grid_2d_function_space.py +80 -66
  63. warp/fem/space/grid_3d_function_space.py +113 -68
  64. warp/fem/space/hexmesh_function_space.py +96 -108
  65. warp/fem/space/nanogrid_function_space.py +62 -110
  66. warp/fem/space/quadmesh_function_space.py +208 -0
  67. warp/fem/space/shape/__init__.py +45 -7
  68. warp/fem/space/shape/cube_shape_function.py +328 -54
  69. warp/fem/space/shape/shape_function.py +10 -1
  70. warp/fem/space/shape/square_shape_function.py +328 -60
  71. warp/fem/space/shape/tet_shape_function.py +269 -19
  72. warp/fem/space/shape/triangle_shape_function.py +238 -19
  73. warp/fem/space/tetmesh_function_space.py +69 -37
  74. warp/fem/space/topology.py +38 -0
  75. warp/fem/space/trimesh_function_space.py +179 -0
  76. warp/fem/utils.py +6 -331
  77. warp/jax_experimental.py +3 -1
  78. warp/native/array.h +15 -0
  79. warp/native/builtin.h +66 -26
  80. warp/native/bvh.h +4 -0
  81. warp/native/coloring.cpp +604 -0
  82. warp/native/cuda_util.cpp +68 -51
  83. warp/native/cuda_util.h +2 -1
  84. warp/native/fabric.h +8 -0
  85. warp/native/hashgrid.h +4 -0
  86. warp/native/marching.cu +8 -0
  87. warp/native/mat.h +14 -3
  88. warp/native/mathdx.cpp +59 -0
  89. warp/native/mesh.h +4 -0
  90. warp/native/range.h +13 -1
  91. warp/native/reduce.cpp +9 -1
  92. warp/native/reduce.cu +7 -0
  93. warp/native/runlength_encode.cpp +9 -1
  94. warp/native/runlength_encode.cu +7 -1
  95. warp/native/scan.cpp +8 -0
  96. warp/native/scan.cu +8 -0
  97. warp/native/scan.h +8 -1
  98. warp/native/sparse.cpp +8 -0
  99. warp/native/sparse.cu +8 -0
  100. warp/native/temp_buffer.h +7 -0
  101. warp/native/tile.h +1854 -0
  102. warp/native/tile_gemm.h +341 -0
  103. warp/native/tile_reduce.h +210 -0
  104. warp/native/volume_builder.cu +8 -0
  105. warp/native/volume_builder.h +8 -0
  106. warp/native/warp.cpp +10 -2
  107. warp/native/warp.cu +369 -15
  108. warp/native/warp.h +12 -2
  109. warp/optim/adam.py +39 -4
  110. warp/paddle.py +29 -12
  111. warp/render/render_opengl.py +140 -67
  112. warp/sim/graph_coloring.py +292 -0
  113. warp/sim/import_urdf.py +8 -8
  114. warp/sim/integrator_euler.py +4 -2
  115. warp/sim/integrator_featherstone.py +115 -44
  116. warp/sim/integrator_vbd.py +6 -0
  117. warp/sim/model.py +109 -32
  118. warp/sparse.py +1 -1
  119. warp/stubs.py +569 -4
  120. warp/tape.py +12 -7
  121. warp/tests/assets/pixel.npy +0 -0
  122. warp/tests/aux_test_instancing_gc.py +18 -0
  123. warp/tests/test_array.py +39 -0
  124. warp/tests/test_codegen.py +81 -1
  125. warp/tests/test_codegen_instancing.py +30 -0
  126. warp/tests/test_collision.py +110 -0
  127. warp/tests/test_coloring.py +251 -0
  128. warp/tests/test_context.py +34 -0
  129. warp/tests/test_examples.py +21 -5
  130. warp/tests/test_fem.py +453 -113
  131. warp/tests/test_func.py +34 -4
  132. warp/tests/test_generics.py +52 -0
  133. warp/tests/test_iter.py +68 -0
  134. warp/tests/test_lerp.py +13 -87
  135. warp/tests/test_mat_scalar_ops.py +1 -1
  136. warp/tests/test_matmul.py +6 -9
  137. warp/tests/test_matmul_lite.py +6 -11
  138. warp/tests/test_mesh_query_point.py +1 -1
  139. warp/tests/test_module_hashing.py +23 -0
  140. warp/tests/test_overwrite.py +45 -0
  141. warp/tests/test_paddle.py +27 -87
  142. warp/tests/test_print.py +56 -1
  143. warp/tests/test_smoothstep.py +17 -83
  144. warp/tests/test_spatial.py +1 -1
  145. warp/tests/test_static.py +3 -3
  146. warp/tests/test_tile.py +744 -0
  147. warp/tests/test_tile_mathdx.py +144 -0
  148. warp/tests/test_tile_mlp.py +383 -0
  149. warp/tests/test_tile_reduce.py +374 -0
  150. warp/tests/test_tile_shared_memory.py +190 -0
  151. warp/tests/test_vbd.py +12 -20
  152. warp/tests/test_volume.py +43 -0
  153. warp/tests/unittest_suites.py +19 -2
  154. warp/tests/unittest_utils.py +4 -2
  155. warp/types.py +340 -74
  156. warp/utils.py +23 -3
  157. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
  158. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
  159. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
  160. warp/fem/field/test.py +0 -180
  161. warp/fem/field/trial.py +0 -183
  162. warp/fem/space/collocated_function_space.py +0 -102
  163. warp/fem/space/quadmesh_2d_function_space.py +0 -261
  164. warp/fem/space/trimesh_2d_function_space.py +0 -153
  165. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
  166. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0
warp/native/bvh.h CHANGED
@@ -404,6 +404,10 @@ CUDA_CALLABLE inline bvh_query_t iter_reverse(const bvh_query_t& query)
404
404
  return query;
405
405
  }
406
406
 
407
+ CUDA_CALLABLE inline void adj_iter_reverse(const bvh_query_t& query, bvh_query_t& adj_query, bvh_query_t& adj_ret)
408
+ {
409
+ }
410
+
407
411
 
408
412
  // stub
409
413
  CUDA_CALLABLE inline void adj_bvh_query_next(bvh_query_t& query, int& index, bvh_query_t&, int&, bool&)
@@ -0,0 +1,604 @@
1
+ /** Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ * and proprietary rights in and to this software, related documentation
4
+ * and any modifications thereto. Any use, reproduction, disclosure or
5
+ * distribution of this software and related documentation without an express
6
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+ */
8
+
9
+ // The Apache 2 License
10
+
11
+ // Copyright 2023 Anka He Chen
12
+ //
13
+ // Licensed under the Apache License, Version 2.0 (the "License"); you may not
14
+ // use this file except in compliance with the License.You may obtain a copy of the License at
15
+ // http ://www.apache.org/licenses/LICENSE-2.0
16
+ // Unless required by applicable law or agreed to in writing, software distributed under the
17
+ // License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
18
+ // either express or implied.See the License for the specific language governing permissions
19
+ // and limitations under the License.
20
+ //
21
+ // Source: https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.cpp
22
+ // https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.h
23
+ // https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/Graph.h
24
+
25
+
26
+
27
+ #include "warp.h"
28
+
29
+ #include <iostream>
30
+ #include <vector>
31
+ #include <array>
32
+ #include <queue>
33
+ #include <queue>
34
+ #include <unordered_set>
35
+ #include <random>
36
+ #include <algorithm>
37
+ #include <numeric>
38
+
39
+ #define SHRINK_GRAPH_PER_PERCENTAGE (5)
40
+ #define NODE_WEIGHTS_PREALLOC (64)
41
+ #define WEIGHT_BUCKET_PREALLOC (512)
42
+
43
+ namespace wp
44
+ {
45
+
46
+ struct Graph
47
+ {
48
+ Graph(int num_nodes_in, const wp::array_t<int>& edges)
49
+ : num_nodes(num_nodes_in)
50
+ {
51
+ node_offsets.resize(num_nodes + 1, 0);
52
+ node_colors.resize(num_nodes, -1);
53
+
54
+ std::vector<int> node_degrees(num_nodes, 0);
55
+
56
+ // count degrees
57
+ for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
58
+ {
59
+ int e0 = *address(edges, edge_idx, 0);
60
+ int e1 = *address(edges, edge_idx, 1);
61
+ node_degrees[e0] += 1;
62
+ node_degrees[e1] += 1;
63
+ }
64
+
65
+ int offset = 0;
66
+ for (size_t node = 0; node < num_nodes; node++)
67
+ {
68
+ offset += node_degrees[node];
69
+ node_offsets[node + 1] = offset;
70
+ }
71
+
72
+ // fill adjacency list
73
+ std::vector<int> node_adjacency_fill_count(num_nodes, 0);
74
+ graph_flatten.resize(offset, -1);
75
+ for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
76
+ {
77
+ int e0 = *address(edges, edge_idx, 0);
78
+ int e1 = *address(edges, edge_idx, 1);
79
+
80
+ int fill_count_e0 = node_adjacency_fill_count[e0];
81
+ graph_flatten[node_offsets[e0] + fill_count_e0] = e1;
82
+
83
+ int fill_count_e1 = node_adjacency_fill_count[e1];
84
+ graph_flatten[node_offsets[e1] + fill_count_e1] = e0;
85
+
86
+ node_adjacency_fill_count[e0] = fill_count_e0 + 1;
87
+ node_adjacency_fill_count[e1] = fill_count_e1 + 1;
88
+ }
89
+
90
+ }
91
+
92
+ int get_node_neighbor(int node, int neighbor_index) const {
93
+ return graph_flatten[node_offsets[node] + neighbor_index];
94
+ }
95
+
96
+ int get_node_degree(int node) const {
97
+ return node_offsets[node + 1] - node_offsets[node];
98
+ }
99
+
100
+
101
+ int num_nodes;
102
+ std::vector<int> graph_flatten;
103
+ std::vector<int> node_offsets;
104
+ std::vector<int> node_colors;
105
+ };
106
+
107
+ void convert_to_color_groups(const int num_colors, const std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
108
+ {
109
+ color_groups.resize(num_colors);
110
+
111
+ for (int node_idx = 0; node_idx < node_colors.size(); node_idx++) {
112
+ int color = node_colors[node_idx];
113
+ color_groups[color].push_back(node_idx);
114
+ }
115
+ }
116
+
117
+ float find_largest_smallest_groups(const std::vector<std::vector<int>>& color_groups, int& biggest_group, int& smallest_group)
118
+ {
119
+ if (color_groups.size() == 0)
120
+ {
121
+ biggest_group = -1;
122
+ smallest_group = -1;
123
+
124
+ return 1;
125
+ }
126
+
127
+ size_t max_size = color_groups[0].size();
128
+ biggest_group = 0;
129
+ size_t min_size = color_groups[0].size();
130
+ smallest_group = 0;
131
+
132
+ for (size_t color = 0; color < color_groups.size(); color++)
133
+ {
134
+ if (max_size < color_groups[color].size()) {
135
+ biggest_group = color;
136
+ max_size = color_groups[color].size();
137
+ }
138
+
139
+ if (min_size > color_groups[color].size())
140
+ {
141
+ smallest_group = color;
142
+ min_size = color_groups[color].size();
143
+ }
144
+ }
145
+
146
+ return float(color_groups[biggest_group].size()) / float(color_groups[smallest_group].size());
147
+ }
148
+
149
+ bool color_changeable(const Graph& graph, int node, int target_color){
150
+ // loop through node and see if it has target color
151
+ for (size_t i = 0; i < graph.get_node_degree(node); i++)
152
+ {
153
+ int nei_node_idx = graph.get_node_neighbor(node, i);
154
+ if (graph.node_colors[nei_node_idx] == target_color)
155
+ {
156
+ return false;
157
+ }
158
+ }
159
+ return true;
160
+ }
161
+
162
+ int find_changeable_node_in_category(
163
+ const Graph& graph,
164
+ const std::vector<std::vector<int>>& color_groups,
165
+ int source_color,
166
+ int target_color
167
+ )
168
+ {
169
+ auto& source_group = color_groups[source_color];
170
+ for (size_t node_idx = 0; node_idx < source_group.size(); node_idx++)
171
+ {
172
+ if (color_changeable(graph, source_group[node_idx], target_color)) {
173
+ return node_idx;
174
+ }
175
+ }
176
+ return -1;
177
+ }
178
+
179
+ void change_color(int color, int node_idx_in_group, int target_color, std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
180
+ {
181
+ int node_idx = color_groups[color][node_idx_in_group];
182
+ node_colors[node_idx] = target_color;
183
+
184
+ if (color_groups.size())
185
+ {
186
+ // O(1) erase
187
+ std::swap(color_groups[color][node_idx_in_group], color_groups[color].back());
188
+ color_groups[color].pop_back();
189
+
190
+ color_groups[target_color].push_back(node_idx);
191
+ }
192
+ }
193
+
194
+ float balance_color_groups(float target_max_min_ratio,
195
+ Graph& graph,
196
+ std::vector<std::vector<int>>& color_groups)
197
+ {
198
+ float max_min_ratio = -1.f;
199
+
200
+ do
201
+ {
202
+ int biggest_group = -1, smallest_group = -1;
203
+
204
+ max_min_ratio = find_largest_smallest_groups(color_groups, biggest_group, smallest_group);
205
+
206
+ // graph is not optimizable anymore or target ratio reached
207
+ if (color_groups[biggest_group].size() - color_groups[smallest_group].size() <= 2
208
+ || max_min_ratio < target_max_min_ratio)
209
+ {
210
+ return max_min_ratio;
211
+ }
212
+
213
+ // find a available vertex from the biggest category to move to the smallest category
214
+ int changeable_color_group_idx = biggest_group;
215
+ int changeable_node_idx = find_changeable_node_in_category(graph, color_groups, biggest_group, smallest_group);
216
+ if (changeable_node_idx == -1)
217
+ {
218
+ for (size_t color = 0; color < color_groups.size(); color++)
219
+ {
220
+ if (color == biggest_group || color == smallest_group)
221
+ {
222
+ continue;
223
+ }
224
+
225
+ changeable_node_idx = find_changeable_node_in_category(graph, color_groups, color, smallest_group);
226
+
227
+ if (changeable_node_idx != -1)
228
+ {
229
+ changeable_color_group_idx = color;
230
+
231
+ break;
232
+ }
233
+ }
234
+ }
235
+
236
+
237
+ if (changeable_node_idx == -1)
238
+ {
239
+ // fprintf(stderr, "The graph is not optimizable anymore, terminated with a max/min ratio: %f without reaching the target ratio: %f\n", max_min_ratio, target_max_min_ratio);
240
+ return max_min_ratio;
241
+ }
242
+ // change the color of changeable_color_idx in group changeable_color_group_idx to
243
+ change_color(changeable_color_group_idx, changeable_node_idx, smallest_group, graph.node_colors, color_groups);
244
+
245
+
246
+ } while (max_min_ratio > target_max_min_ratio);
247
+
248
+ return max_min_ratio;
249
+ }
250
+
251
+ int graph_coloring_ordered_greedy(const std::vector<int>& order, Graph& graph)
252
+ {
253
+ // greedy coloring
254
+ int max_color = -1;
255
+ int num_colored = 0;
256
+ std::vector<bool> color_used;
257
+ color_used.reserve(128);
258
+
259
+ for (size_t i = 0; i < order.size(); i++)
260
+ {
261
+ int node = order[i];
262
+
263
+ // first one
264
+ if (max_color == -1)
265
+ {
266
+ ++max_color;
267
+ graph.node_colors[node] = max_color;
268
+ }
269
+ else {
270
+ color_used.resize(max_color + 1);
271
+
272
+ for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
273
+ {
274
+ color_used[color_counter] = false;
275
+ }
276
+
277
+ // see its neighbor's color
278
+ for (int nei_counter = 0; nei_counter < graph.get_node_degree(node); nei_counter++)
279
+ {
280
+ int nei_node_idx = graph.get_node_neighbor(node, nei_counter);
281
+ if (graph.node_colors[nei_node_idx] >= 0)
282
+ {
283
+ color_used[graph.node_colors[nei_node_idx]] = true;
284
+ }
285
+ }
286
+
287
+ // find the minimal usable color
288
+ int min_usable_color = -1;
289
+ for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
290
+ {
291
+ if (!color_used[color_counter]) {
292
+ min_usable_color = color_counter;
293
+ break;
294
+ }
295
+ }
296
+ if (min_usable_color == -1)
297
+ {
298
+ ++max_color;
299
+ graph.node_colors[node] = max_color;
300
+ }
301
+ else
302
+ {
303
+ graph.node_colors[node] = min_usable_color;
304
+ }
305
+ }
306
+
307
+ num_colored++;
308
+ }
309
+ return (max_color + 1);
310
+ }
311
+
312
+ class NodeWeightBuckets
313
+ {
314
+ public:
315
+ NodeWeightBuckets(int num_nodes)
316
+ : node_weights(num_nodes, 0), node_indices_in_bucket(num_nodes, -1)
317
+ {
318
+ weight_buckets.resize(NODE_WEIGHTS_PREALLOC);
319
+ for (size_t i = 1; i < weight_buckets.size(); i++)
320
+ {
321
+ weight_buckets[i].reserve(WEIGHT_BUCKET_PREALLOC);
322
+ }
323
+ max_weight = 0;
324
+ }
325
+
326
+ int get_node_weight(int node_idx)
327
+ {
328
+ return node_weights[node_idx];
329
+ }
330
+
331
+ void add_node(int weight, int node_idx)
332
+ {
333
+ if (weight >= weight_buckets.size())
334
+ {
335
+ weight_buckets.resize(weight + 1);
336
+ }
337
+
338
+ node_indices_in_bucket[node_idx] = weight_buckets[weight].size();
339
+ node_weights[node_idx] = weight;
340
+ weight_buckets[weight].push_back(node_idx);
341
+
342
+ if (max_weight < weight)
343
+ {
344
+ max_weight = weight;
345
+ }
346
+ }
347
+
348
+ int pop_node_with_max_weight() {
349
+ int node_with_max_weight = weight_buckets[max_weight].front();
350
+ node_indices_in_bucket[node_with_max_weight] = -1;
351
+
352
+ // we pop the first element so it has a breadth-first like behavior, which is better than depth-first
353
+ if (weight_buckets[max_weight].size() > 1)
354
+ {
355
+ node_indices_in_bucket[weight_buckets[max_weight].back()] = 0;
356
+ weight_buckets[max_weight][0] = weight_buckets[max_weight].back();
357
+ }
358
+ weight_buckets[max_weight].pop_back();
359
+ // mark node deleted
360
+ node_weights[node_with_max_weight] = -1;
361
+
362
+ if (weight_buckets[max_weight].size() == 0)
363
+ // we need to update max_weight because weight_buckets[max_weight] became empty
364
+ {
365
+ int new_max_weight = 0;
366
+ for (size_t bucket_idx = max_weight - 1; bucket_idx >= 0; bucket_idx--)
367
+ {
368
+ if (weight_buckets[bucket_idx].size())
369
+ {
370
+ new_max_weight = bucket_idx;
371
+ break;
372
+ }
373
+ }
374
+
375
+ max_weight = new_max_weight;
376
+ }
377
+ // mark deleted
378
+ return node_with_max_weight;
379
+ }
380
+
381
+ void increase_node_weight(int node_idx)
382
+ {
383
+ int weight = node_weights[node_idx];
384
+ assert(weight < weight_buckets.size());
385
+ int node_idx_in_bucket = node_indices_in_bucket[node_idx];
386
+ assert(node_idx_in_bucket < weight_buckets[weight].size());
387
+
388
+ // swap index with the last element
389
+ node_indices_in_bucket[weight_buckets[weight].back()] = node_idx_in_bucket;
390
+ // O(1) erase
391
+ weight_buckets[weight][node_idx_in_bucket] = weight_buckets[weight].back();
392
+ weight_buckets[weight].pop_back();
393
+
394
+ add_node(weight + 1, node_idx);
395
+ }
396
+
397
+ bool empty()
398
+ {
399
+ return max_weight <= 0 && weight_buckets[0].size() == 0;
400
+ }
401
+
402
+
403
+ private:
404
+ int max_weight;
405
+ std::vector<std::vector<int>> weight_buckets;
406
+ std::vector<int> node_indices_in_bucket;
407
+ std::vector<int> node_weights;
408
+ };
409
+
410
+ // Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
411
+ int graph_coloring_mcs_vector(Graph& graph)
412
+ {
413
+ // Initially set the weight of each node to 0
414
+ std::vector<int> ordering;
415
+ ordering.reserve(graph.num_nodes);
416
+
417
+ NodeWeightBuckets weight_buckets(graph.num_nodes);
418
+ // add the first node
419
+ weight_buckets.add_node(0, 0);
420
+
421
+ for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++)
422
+ {
423
+ // this might look like it's O(N^2) but this only happens once per connected components
424
+ if (weight_buckets.empty())
425
+ {
426
+ int non_negative_node = -1;
427
+ for (size_t i = 0; i < graph.num_nodes; i++)
428
+ {
429
+ if (weight_buckets.get_node_weight(i) >= 0) {
430
+ non_negative_node = i;
431
+ break;
432
+ }
433
+ }
434
+ assert(weight_buckets.get_node_weight(non_negative_node) == 0);
435
+ weight_buckets.add_node(0, non_negative_node);
436
+ }
437
+
438
+ int max_node = weight_buckets.pop_node_with_max_weight();
439
+
440
+ // Add highest weight node to the queue and increment all of its neighbors weights by 1
441
+ ordering.push_back(max_node);
442
+
443
+ for (unsigned j = 0; j < graph.get_node_degree(max_node); j++) {
444
+ int neighbor_node = graph.get_node_neighbor(max_node, j);
445
+ int old_weight = weight_buckets.get_node_weight(neighbor_node);
446
+
447
+ if (old_weight == 0)
448
+ // 0-weighted node is not in buckets by default
449
+ {
450
+ weight_buckets.add_node(old_weight + 1, neighbor_node);
451
+
452
+ }
453
+ else if (old_weight > 0) {
454
+ weight_buckets.increase_node_weight(neighbor_node);
455
+ }
456
+ // skip neighbor nodes with negative weight because they are visited
457
+ }
458
+ }
459
+
460
+ return graph_coloring_ordered_greedy(ordering, graph);
461
+ }
462
+
463
+ int next_node(const int num_nodes, const std::vector<int>& degrees)
464
+ {
465
+ int node_min_degrees = -1;
466
+ int min_degree = num_nodes + 1;
467
+ for (size_t node_idx = 0; node_idx < degrees.size(); node_idx++)
468
+ {
469
+ if (degrees[node_idx] == -1)
470
+ {
471
+ continue;
472
+ }
473
+ if (min_degree > degrees[node_idx]) {
474
+ min_degree = degrees[node_idx];
475
+ node_min_degrees = node_idx;
476
+ }
477
+ }
478
+ return node_min_degrees;
479
+ }
480
+
481
+ void reduce_degree(int node_idx, Graph& graph, std::vector<int>& degrees)
482
+ {
483
+ degrees[node_idx] = -1;
484
+ for (size_t nei_node_counter = 0; nei_node_counter < graph.get_node_degree(node_idx); nei_node_counter++)
485
+ {
486
+ int nei_node_idx = graph.get_node_neighbor(node_idx, nei_node_counter);
487
+
488
+ if (degrees[nei_node_idx] != -1)
489
+ {
490
+ degrees[nei_node_idx]--;
491
+ }
492
+ }
493
+ }
494
+
495
+
496
+ // Fratarcangeli, Marco, and Fabio Pellacini. "Scalable partitioning for parallel position based dynamics." Computer Graphics Forum. Vol. 34. No. 2. 2015.
497
+ int graph_coloring_degree_ordered_greedy(Graph& graph)
498
+ {
499
+ // initialize the degree
500
+ std::vector<int> degrees(graph.num_nodes, 0);
501
+ for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++) {
502
+ degrees[node_idx] = graph.get_node_degree(node_idx);
503
+ }
504
+
505
+ // order them in a descending order
506
+ std::vector<int> ordering(graph.num_nodes);
507
+ std::iota(std::begin(ordering), std::end(ordering), 0);
508
+ std::sort(std::begin(ordering), std::end(ordering),
509
+ [&degrees](const auto& lhs, const auto& rhs)
510
+ {
511
+ return degrees[lhs] > degrees[rhs];
512
+ }
513
+ );
514
+
515
+ return graph_coloring_ordered_greedy(ordering, graph);
516
+ }
517
+
518
+ int graph_coloring_naive_greedy(Graph& graph)
519
+ {
520
+ std::vector<int> ordering(graph.num_nodes);
521
+ std::iota(std::begin(ordering), std::end(ordering), 0);
522
+ return graph_coloring_ordered_greedy(ordering, graph);
523
+ }
524
+ }
525
+ using namespace wp;
526
+
527
+ extern "C"
528
+ {
529
+ int graph_coloring(int num_nodes, wp::array_t<int> edges, int algorithm, wp::array_t<int> node_colors)
530
+ {
531
+ if (node_colors.ndim != 1 || node_colors.shape[0] != num_nodes)
532
+ {
533
+ fprintf(stderr, "The node_colors array must have the preallocated shape of (num_nodes,)!\n");
534
+ return -1;
535
+ }
536
+
537
+ if (edges.ndim != 2)
538
+ {
539
+ fprintf(stderr, "The edges array must have 2 dimensions!\n");
540
+ return -1;
541
+ }
542
+
543
+ if (num_nodes == 0)
544
+ {
545
+ fprintf(stderr, "Empty graph!\n");
546
+ return -1;
547
+ }
548
+
549
+ // convert to a format that coloring algorithm can recognize
550
+
551
+ Graph graph(num_nodes, edges);
552
+
553
+ int num_colors = -1;
554
+ switch (algorithm)
555
+ {
556
+ case 0:
557
+ // mcs algorithm
558
+ num_colors = graph_coloring_mcs_vector(graph);
559
+ break;
560
+ case 1:
561
+ // greedy
562
+ num_colors = graph_coloring_degree_ordered_greedy(graph);
563
+ break;
564
+ //case 2:
565
+ // // mcs algorithm
566
+ // num_colors = graph_coloring_mcs_set(graph);
567
+ // break;
568
+ //case 3:
569
+ // // naive greedy
570
+ // num_colors = graph_coloring_naive_greedy(graph);
571
+ // break;
572
+ default:
573
+ fprintf(stderr, "Unrecognized coloring algorithm number: %d!\n", algorithm);
574
+ return -1;
575
+ break;
576
+ }
577
+
578
+ // copy the color info back
579
+ memcpy(node_colors.data, graph.node_colors.data(), num_nodes * sizeof(int));
580
+
581
+ return num_colors;
582
+ }
583
+
584
+ float balance_coloring(int num_nodes, wp::array_t<int> edges, int num_colors,
585
+ float target_max_min_ratio, wp::array_t<int> node_colors)
586
+ {
587
+ Graph graph(num_nodes, edges);
588
+ // copy the color info to graph
589
+ memcpy(graph.node_colors.data(), node_colors.data, num_nodes * sizeof(int));
590
+ if (num_colors > 1) {
591
+ std::vector<std::vector<int>> color_groups;
592
+ convert_to_color_groups(num_colors, graph.node_colors, color_groups);
593
+
594
+ float max_min_ratio = balance_color_groups(target_max_min_ratio, graph, color_groups);
595
+ memcpy(node_colors.data, graph.node_colors.data(), num_nodes * sizeof(int));
596
+
597
+ return max_min_ratio;
598
+ }
599
+ else
600
+ {
601
+ return 1.f;
602
+ }
603
+ }
604
+ }