PyPI - warp-lang - Versions diffs - 1.4.2__py3-none-manylinux2014_aarch64.whl → 1.5.1__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.4.2__py3-none-manylinux2014_aarch64.whl → 1.5.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (166) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1819 -7
warp/codegen.py +197 -61
warp/config.py +2 -2
warp/context.py +379 -107
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/sim/example_cloth.py +4 -25
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -7
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +15 -0
warp/native/builtin.h +66 -26
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +604 -0
warp/native/cuda_util.cpp +68 -51
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1854 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +140 -67
warp/sim/graph_coloring.py +292 -0
warp/sim/import_urdf.py +8 -8
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +109 -32
warp/sparse.py +1 -1
warp/stubs.py +569 -4
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +39 -0
warp/tests/test_codegen.py +81 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +251 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +21 -5
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +34 -4
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_lerp.py +13 -87
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_matmul.py +6 -9
warp/tests/test_matmul_lite.py +6 -11
warp/tests/test_mesh_query_point.py +1 -1
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_overwrite.py +45 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +56 -1
warp/tests/test_smoothstep.py +17 -83
warp/tests/test_spatial.py +1 -1
warp/tests/test_static.py +3 -3
warp/tests/test_tile.py +744 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +19 -2
warp/tests/unittest_utils.py +4 -2
warp/types.py +340 -74
warp/utils.py +23 -3
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0

warp/native/bvh.h CHANGED Viewed

@@ -404,6 +404,10 @@ CUDA_CALLABLE inline bvh_query_t iter_reverse(const bvh_query_t& query)
     return query;
 }
+CUDA_CALLABLE inline void adj_iter_reverse(const bvh_query_t& query, bvh_query_t& adj_query, bvh_query_t& adj_ret)
+{
+}
 // stub
 CUDA_CALLABLE inline void adj_bvh_query_next(bvh_query_t& query, int& index, bvh_query_t&, int&, bool&)

warp/native/coloring.cpp ADDED Viewed

@@ -0,0 +1,604 @@
+/** Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+ // The Apache 2 License
+// Copyright 2023 Anka He Chen
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License.You may obtain a copy of the License at
+// http ://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the
+// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+// either express or implied.See the License for the specific language governing permissions
+// and limitations under the License.
+//
+// Source: https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.cpp
+//         https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/ColoringAlgorithms.h
+//         https://github.com/AnkaChan/Gaia/blob/main/Simulator/Modules/GraphColoring/Graph.h
+#include "warp.h"
+#include <iostream>
+#include <vector>
+#include <array>
+#include <queue>
+#include <queue>
+#include <unordered_set>
+#include <random>
+#include <algorithm>
+#include <numeric>
+#define SHRINK_GRAPH_PER_PERCENTAGE (5)
+#define NODE_WEIGHTS_PREALLOC (64)
+#define WEIGHT_BUCKET_PREALLOC (512)
+namespace wp
+{
+    struct Graph
+    {
+        Graph(int num_nodes_in, const wp::array_t<int>& edges)
+            : num_nodes(num_nodes_in)
+        {
+            node_offsets.resize(num_nodes + 1, 0);
+            node_colors.resize(num_nodes, -1);
+           std::vector<int> node_degrees(num_nodes, 0);
+            // count degrees
+            for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
+            {
+                int e0 = *address(edges, edge_idx, 0);
+                int e1 = *address(edges, edge_idx, 1);
+                node_degrees[e0] += 1;
+                node_degrees[e1] += 1;
+            }
+            int offset = 0;
+            for (size_t node = 0; node < num_nodes; node++)
+            {
+                offset += node_degrees[node];
+                node_offsets[node + 1] = offset;
+            }
+            // fill adjacency list
+            std::vector<int> node_adjacency_fill_count(num_nodes, 0);
+            graph_flatten.resize(offset, -1);
+            for (size_t edge_idx = 0; edge_idx < edges.shape[0]; edge_idx++)
+            {
+                int e0 = *address(edges, edge_idx, 0);
+                int e1 = *address(edges, edge_idx, 1);
+                int fill_count_e0 = node_adjacency_fill_count[e0];
+                graph_flatten[node_offsets[e0] + fill_count_e0] = e1;
+                int fill_count_e1 = node_adjacency_fill_count[e1];
+                graph_flatten[node_offsets[e1] + fill_count_e1] = e0;
+                node_adjacency_fill_count[e0] = fill_count_e0 + 1;
+                node_adjacency_fill_count[e1] = fill_count_e1 + 1;
+            }
+        }
+        int get_node_neighbor(int node, int neighbor_index) const {
+            return graph_flatten[node_offsets[node] + neighbor_index];
+        }
+        int get_node_degree(int node) const {
+            return node_offsets[node + 1] - node_offsets[node];
+        }
+        int num_nodes;
+        std::vector<int> graph_flatten;
+        std::vector<int> node_offsets;
+        std::vector<int> node_colors;
+    };
+void convert_to_color_groups(const int num_colors, const std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
+{
+    color_groups.resize(num_colors);
+    for (int node_idx = 0; node_idx < node_colors.size(); node_idx++) {
+        int color = node_colors[node_idx];
+        color_groups[color].push_back(node_idx);
+    }
+}
+float find_largest_smallest_groups(const std::vector<std::vector<int>>& color_groups, int& biggest_group, int& smallest_group)
+{
+    if (color_groups.size() == 0)
+    {
+        biggest_group = -1;
+        smallest_group = -1;
+        return 1;
+    }
+    size_t max_size = color_groups[0].size();
+    biggest_group = 0;
+    size_t min_size = color_groups[0].size();
+    smallest_group = 0;
+    for (size_t color = 0; color < color_groups.size(); color++)
+    {
+        if (max_size < color_groups[color].size()) {
+            biggest_group = color;
+            max_size = color_groups[color].size();
+        }
+        if (min_size > color_groups[color].size())
+        {
+            smallest_group = color;
+            min_size = color_groups[color].size();
+        }
+    }
+    return float(color_groups[biggest_group].size()) / float(color_groups[smallest_group].size());
+}
+bool color_changeable(const Graph& graph, int node, int target_color){
+    // loop through node and see if it has target color
+    for (size_t i = 0; i < graph.get_node_degree(node); i++)
+    {
+        int nei_node_idx = graph.get_node_neighbor(node, i);
+        if (graph.node_colors[nei_node_idx] == target_color)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+int find_changeable_node_in_category(
+    const Graph& graph,
+    const std::vector<std::vector<int>>& color_groups,
+    int source_color,
+    int target_color
+)
+{
+    auto& source_group = color_groups[source_color];
+    for (size_t node_idx = 0; node_idx < source_group.size(); node_idx++)
+    {
+        if (color_changeable(graph, source_group[node_idx], target_color)) {
+            return node_idx;
+        }
+    }
+    return -1;
+}
+void change_color(int color, int node_idx_in_group, int target_color, std::vector<int>& node_colors, std::vector<std::vector<int>>& color_groups)
+{
+    int node_idx = color_groups[color][node_idx_in_group];
+    node_colors[node_idx] = target_color;
+    if (color_groups.size())
+    {
+        // O(1) erase
+        std::swap(color_groups[color][node_idx_in_group], color_groups[color].back());
+        color_groups[color].pop_back();
+        color_groups[target_color].push_back(node_idx);
+    }
+}
+float balance_color_groups(float target_max_min_ratio,
+    Graph& graph,
+    std::vector<std::vector<int>>& color_groups)
+{
+    float max_min_ratio = -1.f;
+    do
+    {
+        int biggest_group = -1, smallest_group = -1;
+        max_min_ratio = find_largest_smallest_groups(color_groups, biggest_group, smallest_group);
+        // graph is not optimizable anymore or target ratio reached
+        if (color_groups[biggest_group].size() - color_groups[smallest_group].size() <= 2
+            || max_min_ratio < target_max_min_ratio)
+        {
+            return max_min_ratio;
+        }
+        // find a available vertex from the biggest category to move to the smallest category
+        int changeable_color_group_idx = biggest_group;
+        int changeable_node_idx = find_changeable_node_in_category(graph, color_groups, biggest_group, smallest_group);
+        if (changeable_node_idx == -1)
+        {
+            for (size_t color = 0; color < color_groups.size(); color++)
+            {
+                if (color == biggest_group || color == smallest_group)
+                {
+                    continue;
+                }
+                changeable_node_idx = find_changeable_node_in_category(graph, color_groups, color, smallest_group);
+                if (changeable_node_idx != -1)
+                {
+                    changeable_color_group_idx = color;
+                    break;
+                }
+            }
+        }
+        if (changeable_node_idx == -1)
+        {
+            // fprintf(stderr, "The graph is not optimizable anymore, terminated with a max/min ratio: %f without reaching the target ratio: %f\n", max_min_ratio, target_max_min_ratio);
+            return max_min_ratio;
+        }
+        // change the color of changeable_color_idx in group changeable_color_group_idx to
+        change_color(changeable_color_group_idx, changeable_node_idx, smallest_group, graph.node_colors, color_groups);
+    } while (max_min_ratio > target_max_min_ratio);
+    return max_min_ratio;
+}
+int graph_coloring_ordered_greedy(const std::vector<int>& order, Graph& graph)
+{
+    // greedy coloring
+    int max_color = -1;
+    int num_colored = 0;
+    std::vector<bool> color_used;
+    color_used.reserve(128);
+    for (size_t i = 0; i < order.size(); i++)
+    {
+        int node = order[i];
+        // first one
+        if (max_color == -1)
+        {
+            ++max_color;
+            graph.node_colors[node] = max_color;
+        }
+        else {
+            color_used.resize(max_color + 1);
+            for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
+            {
+                color_used[color_counter] = false;
+            }
+            // see its neighbor's color
+            for (int nei_counter = 0; nei_counter < graph.get_node_degree(node); nei_counter++)
+            {
+                int nei_node_idx = graph.get_node_neighbor(node, nei_counter);
+                if (graph.node_colors[nei_node_idx] >= 0)
+                {
+                    color_used[graph.node_colors[nei_node_idx]] = true;
+                }
+            }
+            // find the minimal usable color
+            int min_usable_color = -1;
+            for (int color_counter = 0; color_counter < color_used.size(); color_counter++)
+            {
+                if (!color_used[color_counter]) {
+                    min_usable_color = color_counter;
+                    break;
+                }
+            }
+            if (min_usable_color == -1)
+            {
+                ++max_color;
+                graph.node_colors[node] = max_color;
+            }
+            else
+            {
+                graph.node_colors[node] = min_usable_color;
+            }
+        }
+        num_colored++;
+    }
+    return (max_color + 1);
+}
+class NodeWeightBuckets
+{
+public:
+    NodeWeightBuckets(int num_nodes)
+        : node_weights(num_nodes, 0), node_indices_in_bucket(num_nodes, -1)
+    {
+        weight_buckets.resize(NODE_WEIGHTS_PREALLOC);
+        for (size_t i = 1; i < weight_buckets.size(); i++)
+        {
+            weight_buckets[i].reserve(WEIGHT_BUCKET_PREALLOC);
+        }
+        max_weight = 0;
+    }
+    int get_node_weight(int node_idx)
+    {
+        return node_weights[node_idx];
+    }
+    void add_node(int weight, int node_idx)
+    {
+        if (weight >= weight_buckets.size())
+        {
+            weight_buckets.resize(weight + 1);
+        }
+        node_indices_in_bucket[node_idx] = weight_buckets[weight].size();
+        node_weights[node_idx] = weight;
+        weight_buckets[weight].push_back(node_idx);
+        if (max_weight < weight)
+        {
+            max_weight = weight;
+        }
+    }
+    int pop_node_with_max_weight() {
+        int node_with_max_weight = weight_buckets[max_weight].front();
+        node_indices_in_bucket[node_with_max_weight] = -1;
+        // we pop the first element so it has a breadth-first like behavior, which is better than depth-first
+        if (weight_buckets[max_weight].size() > 1)
+        {
+            node_indices_in_bucket[weight_buckets[max_weight].back()] = 0;
+            weight_buckets[max_weight][0] = weight_buckets[max_weight].back();
+        }
+        weight_buckets[max_weight].pop_back();
+        // mark node deleted
+        node_weights[node_with_max_weight] = -1;
+        if (weight_buckets[max_weight].size() == 0)
+            // we need to update max_weight because weight_buckets[max_weight] became empty
+        {
+            int new_max_weight = 0;
+            for (size_t bucket_idx = max_weight - 1; bucket_idx >= 0; bucket_idx--)
+            {
+                if (weight_buckets[bucket_idx].size())
+                {
+                    new_max_weight = bucket_idx;
+                    break;
+                }
+            }
+            max_weight = new_max_weight;
+        }
+        // mark deleted
+        return node_with_max_weight;
+    }
+    void increase_node_weight(int node_idx)
+    {
+        int weight = node_weights[node_idx];
+        assert(weight < weight_buckets.size());
+        int node_idx_in_bucket = node_indices_in_bucket[node_idx];
+        assert(node_idx_in_bucket < weight_buckets[weight].size());
+        // swap index with the last element
+        node_indices_in_bucket[weight_buckets[weight].back()] = node_idx_in_bucket;
+        // O(1) erase
+        weight_buckets[weight][node_idx_in_bucket] = weight_buckets[weight].back();
+        weight_buckets[weight].pop_back();
+        add_node(weight + 1, node_idx);
+    }
+    bool empty()
+    {
+        return max_weight <= 0 && weight_buckets[0].size() == 0;
+    }
+private:
+    int max_weight;
+    std::vector<std::vector<int>> weight_buckets;
+    std::vector<int> node_indices_in_bucket;
+    std::vector<int> node_weights;
+};
+// Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
+int graph_coloring_mcs_vector(Graph& graph)
+{
+    // Initially set the weight of each node to 0
+    std::vector<int> ordering;
+    ordering.reserve(graph.num_nodes);
+    NodeWeightBuckets weight_buckets(graph.num_nodes);
+    // add the first node
+    weight_buckets.add_node(0, 0);
+    for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++)
+    {
+        // this might look like it's O(N^2) but this only happens once per connected components
+        if (weight_buckets.empty())
+        {
+            int non_negative_node = -1;
+            for (size_t i = 0; i < graph.num_nodes; i++)
+            {
+                if (weight_buckets.get_node_weight(i) >= 0) {
+                    non_negative_node = i;
+                    break;
+                }
+            }
+            assert(weight_buckets.get_node_weight(non_negative_node) == 0);
+            weight_buckets.add_node(0, non_negative_node);
+        }
+        int max_node = weight_buckets.pop_node_with_max_weight();
+        // Add highest weight node to the queue and increment all of its neighbors weights by 1
+        ordering.push_back(max_node);
+        for (unsigned j = 0; j < graph.get_node_degree(max_node); j++) {
+            int neighbor_node = graph.get_node_neighbor(max_node, j);
+            int old_weight = weight_buckets.get_node_weight(neighbor_node);
+            if (old_weight == 0)
+                // 0-weighted node is not in buckets by default
+            {
+                weight_buckets.add_node(old_weight + 1, neighbor_node);
+            }
+            else if (old_weight > 0) {
+                weight_buckets.increase_node_weight(neighbor_node);
+            }
+            // skip neighbor nodes with negative weight because they are visited
+        }
+    }
+    return graph_coloring_ordered_greedy(ordering, graph);
+}
+int next_node(const int num_nodes, const std::vector<int>& degrees)
+{
+    int node_min_degrees = -1;
+    int min_degree = num_nodes + 1;
+    for (size_t node_idx = 0; node_idx < degrees.size(); node_idx++)
+    {
+        if (degrees[node_idx] == -1)
+        {
+            continue;
+        }
+        if (min_degree > degrees[node_idx]) {
+            min_degree = degrees[node_idx];
+            node_min_degrees = node_idx;
+        }
+    }
+    return node_min_degrees;
+}
+void reduce_degree(int node_idx, Graph& graph, std::vector<int>& degrees)
+{
+    degrees[node_idx] = -1;
+    for (size_t nei_node_counter = 0; nei_node_counter < graph.get_node_degree(node_idx); nei_node_counter++)
+    {
+        int nei_node_idx = graph.get_node_neighbor(node_idx, nei_node_counter);
+        if (degrees[nei_node_idx] != -1)
+        {
+            degrees[nei_node_idx]--;
+        }
+    }
+}
+// Fratarcangeli, Marco, and Fabio Pellacini. "Scalable partitioning for parallel position based dynamics." Computer Graphics Forum. Vol. 34. No. 2. 2015.
+int graph_coloring_degree_ordered_greedy(Graph& graph)
+{
+    // initialize the degree
+    std::vector<int> degrees(graph.num_nodes, 0);
+    for (int node_idx = 0; node_idx < graph.num_nodes; node_idx++) {
+        degrees[node_idx] = graph.get_node_degree(node_idx);
+    }
+    // order them in a descending order
+    std::vector<int> ordering(graph.num_nodes);
+    std::iota(std::begin(ordering), std::end(ordering), 0);
+    std::sort(std::begin(ordering), std::end(ordering),
+        [&degrees](const auto& lhs, const auto& rhs)
+        {
+            return degrees[lhs] > degrees[rhs];
+        }
+    );
+    return graph_coloring_ordered_greedy(ordering, graph);
+}
+int graph_coloring_naive_greedy(Graph& graph)
+{
+    std::vector<int> ordering(graph.num_nodes);
+    std::iota(std::begin(ordering), std::end(ordering), 0);
+    return graph_coloring_ordered_greedy(ordering, graph);
+}
+}
+using namespace wp;
+extern "C"
+{
+    int graph_coloring(int num_nodes, wp::array_t<int> edges, int algorithm, wp::array_t<int> node_colors)
+    {
+        if (node_colors.ndim != 1 || node_colors.shape[0] != num_nodes)
+        {
+            fprintf(stderr, "The node_colors array must have the preallocated shape of (num_nodes,)!\n");
+            return -1;
+        }
+        if (edges.ndim != 2)
+        {
+            fprintf(stderr, "The edges array must have 2 dimensions!\n");
+            return -1;
+        }
+        if (num_nodes == 0)
+        {
+            fprintf(stderr, "Empty graph!\n");
+            return -1;
+        }
+        // convert to a format that coloring algorithm can recognize
+        Graph graph(num_nodes, edges);
+        int num_colors = -1;
+        switch (algorithm)
+        {
+        case 0:
+            // mcs algorithm
+            num_colors = graph_coloring_mcs_vector(graph);
+            break;
+        case 1:
+            // greedy
+            num_colors = graph_coloring_degree_ordered_greedy(graph);
+            break;
+        //case 2:
+        //    // mcs algorithm
+        //    num_colors = graph_coloring_mcs_set(graph);
+        //    break;
+        //case 3:
+        //    // naive greedy
+        //    num_colors = graph_coloring_naive_greedy(graph);
+        //    break;
+        default:
+            fprintf(stderr, "Unrecognized coloring algorithm number: %d!\n", algorithm);
+            return -1;
+            break;
+        }
+        // copy the color info back
+        memcpy(node_colors.data, graph.node_colors.data(), num_nodes * sizeof(int));
+        return num_colors;
+    }
+    float balance_coloring(int num_nodes, wp::array_t<int> edges, int num_colors,
+        float target_max_min_ratio, wp::array_t<int> node_colors)
+    {
+        Graph graph(num_nodes, edges);
+        // copy the color info to graph
+        memcpy(graph.node_colors.data(), node_colors.data, num_nodes * sizeof(int));
+        if (num_colors > 1) {
+            std::vector<std::vector<int>> color_groups;
+            convert_to_color_groups(num_colors, graph.node_colors, color_groups);
+            float max_min_ratio = balance_color_groups(target_max_min_ratio, graph, color_groups);
+            memcpy(node_colors.data, graph.node_colors.data(), num_nodes * sizeof(int));
+            return max_min_ratio;
+        }
+        else
+        {
+            return 1.f;
+        }
+    }
+}