PyPI - xmos-ai-tools - Versions diffs - 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl - Mend

xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h ADDED Viewed

@@ -0,0 +1,88 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_
+#include <functional>
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite_micro {
+namespace reference_ops {
+template <typename T>
+std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
+  if (is_arg_max) {
+    return std::greater<T>();
+  } else {
+    return std::less<T>();
+  }
+}
+template <typename T1, typename T2, typename T3, typename Cmp>
+void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
+               const T3* input2_data, const RuntimeShape& output_shape,
+               T2* output_data, const Cmp& cmp) {
+  TFLITE_DCHECK_GT(input1_shape.DimensionsCount(), 0);
+  TFLITE_DCHECK_EQ(input1_shape.DimensionsCount() - 1,
+                   output_shape.DimensionsCount());
+  int axis = input2_data[0];
+  if (axis < 0) {
+    axis += input1_shape.DimensionsCount();
+  }
+  const int axis_size = input1_shape.Dims(axis);
+  int outer_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i));
+    outer_size *= input1_shape.Dims(i);
+  }
+  int inner_size = 1;
+  const int dims_count = input1_shape.DimensionsCount();
+  for (int i = axis + 1; i < dims_count; ++i) {
+    TFLITE_DCHECK_EQ(input1_shape.Dims(i), output_shape.Dims(i - 1));
+    inner_size *= input1_shape.Dims(i);
+  }
+  for (int outer = 0; outer < outer_size; ++outer) {
+    for (int inner = 0; inner < inner_size; ++inner) {
+      auto min_max_value = input1_data[outer * axis_size * inner_size + inner];
+      T2 min_max_index = 0;
+      for (int i = 1; i < axis_size; ++i) {
+        const auto& curr_value =
+            input1_data[(outer * axis_size + i) * inner_size + inner];
+        if (cmp(curr_value, min_max_value)) {
+          min_max_value = curr_value;
+          min_max_index = static_cast<T2>(i);
+        }
+      }
+      output_data[outer * inner_size + inner] = min_max_index;
+    }
+  }
+}
+template <typename T1, typename T2, typename T3>
+void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
+               const T3* input2_data, const RuntimeShape& output_shape,
+               T2* output_data, const bool is_arg_max) {
+  ArgMinMax(input1_shape, input1_data, input2_data, output_shape, output_data,
+            GetComparefunction<T1>(is_arg_max));
+}
+}  // namespace reference_ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_ARG_MIN_MAX_H_

xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h ADDED Viewed

@@ -0,0 +1,275 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_
+#include <algorithm>
+#include <cstdint>
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite_micro {
+namespace reference_ops {
+namespace batch_matmul {
+// Determine which dimension is the broadcast dimension.
+inline int broadcast_dim(int lhs_dim, int rhs_dim) {
+  if (lhs_dim == rhs_dim) return lhs_dim;
+  if (lhs_dim == 1) return rhs_dim;
+  TFLITE_DCHECK_EQ(rhs_dim, 1);
+  return lhs_dim;
+}
+// Compute the "extent" for iterating on this dimension.
+// If we are broadcasting, then don't advance (i.e return 0).
+inline int extent(const RuntimeShape& shape, int x) {
+  if (shape.Dims(x) == 1) {
+    return 0;
+  }
+  int prod = 1;
+  for (int i = x + 1; i < shape.DimensionsCount(); ++i) {
+    prod *= shape.Dims(i);
+  }
+  return prod;
+}
+}  // namespace batch_matmul
+template <typename Ta, typename Tb, typename Tout>
+inline void BatchMatMul(const RuntimeShape& lhs_shape, const Ta* lhs_data,
+                        const RuntimeShape& rhs_shape, const Tb* rhs_data,
+                        const RuntimeShape& output_shape, Tout* output_data) {
+  const RuntimeShape extended_lhs_shape =
+      RuntimeShape::ExtendedShape(5, lhs_shape);
+  const RuntimeShape extended_rhs_shape =
+      RuntimeShape::ExtendedShape(5, rhs_shape);
+  const int batch_dim0 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
+  const int batch_dim1 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
+  const int batch_dim2 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
+  // Set params for each matrix multiply.
+  const int lhs_rows = extended_lhs_shape.Dims(3);
+  const int rhs_cols = extended_rhs_shape.Dims(4);
+  const int accum_depth = extended_lhs_shape.Dims(4);
+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
+    const Ta* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
+    const Tb* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
+      const Ta* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
+      const Tb* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
+        const Ta* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
+        const Tb* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
+        Tout* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
+                                       b1 * batch_dim2 + b2) *
+                                          lhs_rows * rhs_cols;
+        for (int j = 0; j < rhs_cols; ++j) {
+          for (int i = 0; i < lhs_rows; ++i) {
+            Tout total = 0;
+            for (int k = 0; k < accum_depth; ++k) {
+              total += static_cast<Tout>(lhs_ptr2[accum_depth * i + k]) *
+                       static_cast<Tout>(rhs_ptr2[j * accum_depth + k]);
+            }
+            int idx = lhs_rows * j + i;
+            out_ptr[idx] = total;
+          }
+        }
+      }
+    }
+  }
+}
+inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data,
+                        const RuntimeShape& rhs_shape, const int8_t* rhs_data,
+                        const float* scaling_factors,
+                        const int32_t* input_offset, int32_t* row_sums,
+                        const RuntimeShape& output_shape, float* output_data,
+                        bool* compute_row_sums) {
+  const RuntimeShape extended_lhs_shape =
+      RuntimeShape::ExtendedShape(5, lhs_shape);
+  const RuntimeShape extended_rhs_shape =
+      RuntimeShape::ExtendedShape(5, rhs_shape);
+  const int batch_dim0 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
+  const int batch_dim1 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
+  const int batch_dim2 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
+  // Set params for each matrix multiply.
+  const int lhs_rows = extended_lhs_shape.Dims(3);
+  const int rhs_cols = extended_rhs_shape.Dims(4);
+  const int accum_depth = extended_lhs_shape.Dims(4);
+  const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols;
+  const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols;
+  const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols;
+  const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows;
+  const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows;
+  const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows;
+  if (!compute_row_sums || *compute_row_sums) {
+    int num_weights_matrices = 1;
+    for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) {
+      num_weights_matrices *= extended_lhs_shape.Dims(i);
+    }
+    tensor_utils::ReductionSumVector(
+        lhs_data, row_sums, num_weights_matrices * lhs_rows, accum_depth);
+    if (compute_row_sums) {
+      *compute_row_sums = false;
+    }
+  }
+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
+    const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
+    const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
+    const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0);
+    const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0);
+    const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0);
+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
+      const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
+      const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
+      const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1);
+      const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1);
+      const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1);
+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
+        const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
+        const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
+        const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2);
+        const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2);
+        const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2);
+        float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) +
+                                        b1 * batch_dim2 + b2) *
+                                           lhs_rows * rhs_cols;
+        for (int j = 0; j < rhs_cols; ++j) {
+          const float batch_scaling_factor = scale_ptr2[j];
+          const float batch_offset = static_cast<float>(ioff_ptr2[j]);
+          for (int i = 0; i < lhs_rows; ++i) {
+            int32_t total = 0;
+            for (int k = 0; k < accum_depth; ++k) {
+              total +=
+                  lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k];
+            }
+            int32_t row_sum = woff_ptr2[i];
+            total -= row_sum * batch_offset;
+            int idx = lhs_rows * j + i;
+            out_ptr[idx] += batch_scaling_factor * total;
+          }
+        }
+      }
+    }
+  }
+}
+template <typename T, typename AccumT>
+inline void BatchMatMul(const FullyConnectedParams& params,
+                        const RuntimeShape& lhs_shape, const T* lhs_data,
+                        const RuntimeShape& rhs_shape, const T* rhs_data,
+                        const RuntimeShape& output_shape, T* output_data) {
+  const RuntimeShape extended_lhs_shape =
+      RuntimeShape::ExtendedShape(5, lhs_shape);
+  const RuntimeShape extended_rhs_shape =
+      RuntimeShape::ExtendedShape(5, rhs_shape);
+  const int batch_dim0 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0));
+  const int batch_dim1 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1));
+  const int batch_dim2 = batch_matmul::broadcast_dim(
+      extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2));
+  const int lhs_ext0 = batch_matmul::extent(extended_lhs_shape, 0);
+  const int lhs_ext1 = batch_matmul::extent(extended_lhs_shape, 1);
+  const int lhs_ext2 = batch_matmul::extent(extended_lhs_shape, 2);
+  const int rhs_ext0 = batch_matmul::extent(extended_rhs_shape, 0);
+  const int rhs_ext1 = batch_matmul::extent(extended_rhs_shape, 1);
+  const int rhs_ext2 = batch_matmul::extent(extended_rhs_shape, 2);
+  // Set params for each matrix multiply.
+  const int lhs_rows = extended_lhs_shape.Dims(3);
+  const int rhs_cols = extended_rhs_shape.Dims(4);
+  const int accum_depth = extended_lhs_shape.Dims(4);
+  const int32_t input_offset = params.input_offset;
+  const int32_t filter_offset = params.weights_offset;
+  const int32_t output_offset = params.output_offset;
+  const int32_t output_multiplier = params.output_multiplier;
+  const int output_shift = params.output_shift;
+  const int32_t output_activation_min = params.quantized_activation_min;
+  const int32_t output_activation_max = params.quantized_activation_max;
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  for (int b0 = 0; b0 < batch_dim0; ++b0) {
+    const T* lhs_ptr0 = lhs_data + (b0 * lhs_ext0);
+    const T* rhs_ptr0 = rhs_data + (b0 * rhs_ext0);
+    for (int b1 = 0; b1 < batch_dim1; ++b1) {
+      const T* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1;
+      const T* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1;
+      for (int b2 = 0; b2 < batch_dim2; ++b2) {
+        const T* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2;
+        const T* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2;
+        T* out_ptr = output_data +
+                     ((b0 * batch_dim1 * batch_dim2) + b1 * batch_dim2 + b2) *
+                         lhs_rows * rhs_cols;
+        for (int j = 0; j < rhs_cols; ++j) {
+          for (int i = 0; i < lhs_rows; ++i) {
+            AccumT total = 0;
+            for (int k = 0; k < accum_depth; ++k) {
+              AccumT lhs_val = lhs_ptr2[accum_depth * i + k];
+              AccumT rhs_val = rhs_ptr2[accum_depth * j + k];
+              total += (lhs_val + filter_offset) * (rhs_val + input_offset);
+            }
+            int32_t total_scaled = MultiplyByQuantizedMultiplier(
+                total, output_multiplier, output_shift);
+            total_scaled += output_offset;
+            total_scaled = std::max(total_scaled, output_activation_min);
+            total_scaled = std::min(total_scaled, output_activation_max);
+            const int idx = lhs_rows * j + i;
+            out_ptr[idx] = static_cast<T>(total_scaled);
+          }
+        }
+      }
+    }
+  }
+}
+}  // namespace reference_ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_MATMUL_H_

xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h ADDED Viewed

@@ -0,0 +1,101 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_
+#include <cmath>
+#include "ruy/profiler/instrumentation.h"  // from @ruy
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite_micro {
+namespace reference_ops {
+// TODO(b/135760455): Move this method anonymous namespace in a cc file.
+inline RuntimeShape ExtendShapeBatchToSpace(const RuntimeShape& shape) {
+  if (shape.DimensionsCount() == 4) {
+    return shape;
+  }
+  RuntimeShape new_shape(4, 1);
+  new_shape.SetDim(0, shape.Dims(0));
+  new_shape.SetDim(1, shape.Dims(1));
+  new_shape.SetDim(3, shape.Dims(2));
+  return new_shape;
+}
+template <typename T>
+inline void BatchToSpaceND(const RuntimeShape& unextended_input1_shape,
+                           const T* input1_data,
+                           const RuntimeShape& unextended_input2_shape,
+                           const int32_t* block_shape_data,
+                           const RuntimeShape& unextended_input3_shape,
+                           const int32_t* crops_data,
+                           const RuntimeShape& unextended_output_shape,
+                           T* output_data) {
+  ruy::profiler::ScopeLabel label("BatchToSpaceND");
+  TFLITE_DCHECK_GE(unextended_input1_shape.DimensionsCount(), 3);
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(unextended_input1_shape.DimensionsCount(),
+                   unextended_output_shape.DimensionsCount());
+  const RuntimeShape input1_shape =
+      ExtendShapeBatchToSpace(unextended_input1_shape);
+  const RuntimeShape output_shape =
+      ExtendShapeBatchToSpace(unextended_output_shape);
+  const int output_width = output_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_batch_size = output_shape.Dims(0);
+  const int depth = input1_shape.Dims(3);
+  const int input_width = input1_shape.Dims(2);
+  const int input_height = input1_shape.Dims(1);
+  const int input_batch_size = input1_shape.Dims(0);
+  const int block_shape_height = block_shape_data[0];
+  const int block_shape_width =
+      unextended_input1_shape.DimensionsCount() == 4 ? block_shape_data[1] : 1;
+  const int crops_top = crops_data[0];
+  const int crops_left =
+      unextended_input1_shape.DimensionsCount() == 4 ? crops_data[2] : 0;
+  for (int in_batch = 0; in_batch < input_batch_size; ++in_batch) {
+    const int out_batch = in_batch % output_batch_size;
+    const int spatial_offset = in_batch / output_batch_size;
+    for (int in_h = 0; in_h < input_height; ++in_h) {
+      const int out_h = in_h * block_shape_height +
+                        spatial_offset / block_shape_width - crops_top;
+      if (out_h < 0 || out_h >= output_height) {
+        continue;
+      }
+      for (int in_w = 0; in_w < input_width; ++in_w) {
+        const int out_w = in_w * block_shape_width +
+                          spatial_offset % block_shape_width - crops_left;
+        if (out_w < 0 || out_w >= output_width) {
+          continue;
+        }
+        T* out = output_data + Offset(output_shape, out_batch, out_h, out_w, 0);
+        const T* in =
+            input1_data + Offset(input1_shape, in_batch, in_h, in_w, 0);
+        memcpy(out, in, depth * sizeof(T));
+      }
+    }
+  }
+}
+}  // namespace reference_ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BATCH_TO_SPACE_ND_H_

xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h ADDED Viewed

@@ -0,0 +1,91 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite_micro {
+namespace reference_ops {
+// Also appears to duplicate MinimumMaximum.
+//
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+template <typename R, typename T1, typename T2>
+inline void BroadcastBinaryFunction4DSlow(
+    const RuntimeShape& unextended_input1_shape, const T1* input1_data,
+    const RuntimeShape& unextended_input2_shape, const T2* input2_data,
+    const RuntimeShape& unextended_output_shape, R* output_data,
+    R (*func)(T1, T2)) {
+  TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
+  const RuntimeShape output_shape =
+      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+  NdArrayDesc<4> desc1;
+  NdArrayDesc<4> desc2;
+  NdArrayDescsForElementwiseBroadcast(unextended_input1_shape,
+                                      unextended_input2_shape, &desc1, &desc2);
+  const int* dims_data =
+      reinterpret_cast<const int*>(output_shape.DimsDataUpTo5D());
+  for (int b = 0; b < output_shape.Dims(0); ++b) {
+    int out_idx_b = b * dims_data[1];
+    int in_idx1_b = desc1.strides[0] * b;
+    int in_idx2_b = desc2.strides[0] * b;
+    for (int y = 0; y < output_shape.Dims(1); ++y) {
+      int out_idx_y = (out_idx_b + y) * dims_data[2];
+      int in_idx1_y = in_idx1_b + desc1.strides[1] * y;
+      int in_idx2_y = in_idx2_b + desc2.strides[1] * y;
+      for (int x = 0; x < output_shape.Dims(2); ++x) {
+        int out_idx_x = (out_idx_y + x) * dims_data[3];
+        int in1_idx = in_idx1_y + desc1.strides[2] * x;
+        int in2_idx = in_idx2_y + desc2.strides[2] * x;
+        for (int c = 0; c < output_shape.Dims(3); ++c) {
+          auto out_idx = out_idx_x + c;
+          auto in1_val = input1_data[in1_idx];
+          auto in2_val = input2_data[in2_idx];
+          output_data[out_idx] = func(in1_val, in2_val);
+          in1_idx += desc1.strides[3];
+          in2_idx += desc2.strides[3];
+        }
+      }
+    }
+  }
+}
+// R: Result type. T1: Input 1 type. T2: Input 2 type.
+template <typename R, typename T1, typename T2>
+inline void BinaryFunction(const RuntimeShape& input1_shape,
+                           const T1* input1_data,
+                           const RuntimeShape& input2_shape,
+                           const T2* input2_data,
+                           const RuntimeShape& output_shape, R* output_data,
+                           R (*func)(T1, T2)) {
+  const int flat_size =
+      MatchingFlatSize(input1_shape, input2_shape, output_shape);
+  for (int i = 0; i < flat_size; ++i) {
+    output_data[i] = func(input1_data[i], input2_data[i]);
+  }
+}
+}  // namespace reference_ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BINARY_FUNCTION_H_

xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h ADDED Viewed

@@ -0,0 +1,56 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+namespace tflite_micro {
+namespace reference_ops {
+template <typename T>
+void BroadcastArgs(const RuntimeShape& input1_shape, const T* input1_data,
+                   const RuntimeShape& input2_shape, const T* input2_data,
+                   const RuntimeShape& output_shape, T* output_data) {
+  // Gets data at the backward index i of the shape tensor. Returns 1 if the
+  // index is out of range.
+  auto get_shape_data = [](const RuntimeShape& shape, const T* data,
+                           int backward_idx) -> T {
+    int forward_idx = shape.FlatSize() - 1 - backward_idx;
+    if (forward_idx < 0) return 1;
+    return data[forward_idx];
+  };
+  int output_num_elements = output_shape.FlatSize();
+  for (int i = 0; i < output_num_elements; ++i) {
+    int backward_i = output_num_elements - 1 - i;
+    int shape1_i = get_shape_data(input1_shape, input1_data, i);
+    int shape2_i = get_shape_data(input2_shape, input2_data, i);
+    if (shape1_i == 1) {
+      output_data[backward_i] = shape2_i;
+    } else if (shape2_i == 1) {
+      output_data[backward_i] = shape1_i;
+    } else {
+      TFLITE_CHECK_EQ(shape1_i, shape2_i);
+      output_data[backward_i] = shape1_i;
+    }
+  }
+}
+}  // namespace reference_ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_BROADCAST_ARGS_H_