PyPI - xmos-ai-tools - Versions diffs - 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl - Mend

xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (395) hide show

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h ADDED Viewed

@@ -0,0 +1,139 @@
+/* Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_
+#if defined(HIFIMINI)
+#include <xtensa/tie/xt_hifi2.h>
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/micro/kernels/xtensa/xtensa.h"
+namespace tflite_micro {
+// INT24 MIN/MAX
+#define INT24_MIN -8388608
+#define INT24_MAX 8388607
+// Multiply 24bit value by a quantized multiplier (w/ shift) and returns a 48bit
+// aligned value in the QR register.
+inline ae_q56s MultiplyByQuantizedMultiplier(ae_p24x2s x_24x2,
+                                             int32_t quantized_multiplier,
+                                             int shift) {
+  // A value with 1 sign bit, N integer bits and M fractional bits is
+  // represented as QN+1.M since the sign bit is included in the integer bits.
+  //
+  // The Q notation in this method explains the values represented in each
+  // variable, along with an implicit division since the quantized_multiplier
+  // represents a value between 0.5 and 1.0 (Q1.X-1 where X is the bit precision
+  // of the type).
+  //
+  // Load the quantized multiplier into the PR register.
+  // NOTE: This method assumes that this param has been calculated for 24bit
+  // space - not 32bits.
+  // Q32.0 / 2^23 -> Q24.0 / 2^23 representing a Q1.23 multiplier.
+  ae_p24x2s quantized_multiplier_24x2 = AE_MOVPA24(quantized_multiplier);
+  // Shift right by 23 - 16 bits minus the specified shift.  This is because we
+  // keep 16 fractional bits until the end to perform rounding.  Subtract shift
+  // since shift is a left shift, and the 23-16 is a right shift.
+  int shift_amount = 7 - shift;
+  // Find the product of x and the quantized_multiplier.
+  // Q24.0 / 2^23 * Q24.0 = Q48.0 / 2^23
+  // Q48.0 / 2^23 >> 7 = Q48.0 / 2^16
+  ae_q56s result_56 = AE_MULP24S_HH(x_24x2, quantized_multiplier_24x2);
+  // Shift right if shift amount is positive, left if shift amount is negative.
+  if (shift_amount >= 0) {
+    result_56 = AE_Q56S_SRA(result_56, shift_amount);
+  } else {
+    result_56 = AE_Q56S_SLA(result_56, -shift_amount);
+  }
+  // Round off the bottom 16 bits.
+  // Q48.0 / 2^16 -> Q32.0 aligned to 48 bits.
+  result_56 = AE_ROUNDSQ32SYM(result_56);
+  return result_56;
+}
+// Multiply 32bit value by a quantized multiplier (w/ shift) and returns a 48bit
+// aligned value in the QR register.
+inline ae_q56s MultiplyByQuantizedMultiplierResult48Bit(
+    int32_t x, int32_t quantized_multiplier, int shift) {
+  // Convert x into a 2x24bit PR register file. If x is outside the numerical
+  // limits of a 24bit integer, the "fractional" or lower 8bits are discarded.
+  // If x is within the range of a 24 bit integer, the "signed" or upper 8bits
+  // are discarded.
+  ae_p24x2s x_24x2;
+  if (x > INT24_MIN && x < INT24_MAX) {
+    x_24x2 = AE_MOVPA24(x);
+  } else {
+    x_24x2 = static_cast<ae_p24s>(*reinterpret_cast<ae_p24f*>(&x));
+    shift += 8;
+  }
+  return MultiplyByQuantizedMultiplier(x_24x2, quantized_multiplier, shift);
+}
+// Calculate quantization params for 24bit runtimes.
+inline void QuantizeMultiplierForInt24(float multiplier,
+                                       int32_t* quantized_multiplier,
+                                       int* shift) {
+  if (multiplier == 0.0f) {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+  // Special cased to 24bit:
+  const float q = std::frexp(multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (1 << 23)));
+  TFLITE_CHECK(q_fixed <= (1 << 23));
+  if (q_fixed == (1 << 23)) {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  TFLITE_CHECK_LE(q_fixed, INT24_MAX);
+  // Ensure shift does not exceed 24-bit range.
+  TFLITE_CHECK_LE(*shift, 23);
+  if (*shift < -23) {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+// Convert a floating point number to a Q representation for 24 bit integers.
+inline int CreateQConstantForInt24(int integer_bits, float f) {
+  const float min_bounds = static_cast<float>(INT24_MIN);
+  const float max_bounds = static_cast<float>(INT24_MAX);
+  int fractional_bits = 23 - integer_bits;
+  float raw = std::round(f * static_cast<float>(1 << fractional_bits));
+  raw = std::max(raw, min_bounds);
+  raw = std::min(raw, max_bounds);
+  return static_cast<int>(raw);
+}
+}  // namespace tflite_micro
+#endif  // defined(HIFIMINI)
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_HIFIMINI_FIXEDPOINT_UTILS_H_

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h ADDED Viewed

@@ -0,0 +1,216 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_LSTM_EVAL_H_
+#define TENSORFLOW_LITE_KERNELS_LSTM_EVAL_H_
+#include <cstdint>
+#include <memory>
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/portable_tensor_utils.h"
+#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h"
+#include "tensorflow/lite/micro/kernels/kernel_util.h"
+namespace tflite_micro {
+namespace ops {
+namespace micro {
+namespace lstm_eval {
+#if defined(HIFI5)
+void calc_cell_state_without_cifg(int16_t* cell_state,
+                                  const int16_t* forget_gate,
+                                  const int16_t* cell_gate,
+                                  const int16_t* input_gate, int shift1,
+                                  int shift2, int clip, int num_elms);
+void calc_cell_state_with_cifg(int16_t* cell_state, const int16_t* forget_gate,
+                               const int16_t* cell_gate, int shift1, int shift2,
+                               int clip, int num_elms);
+void xa_nn_elm_mul_16x16_asym8s(int8_t* output, const int16_t* input_1,
+                                const int16_t* input_2, int32_t multiplier,
+                                int32_t shift, int32_t zero_point,
+                                int num_elms);
+#endif  // defined(HIFI5)
+// Pamameters for integer LSTM.
+// Consider split this into two Integer Parameters if more fields are added.
+struct IntegerLstmParameter {
+  int32_t effective_input_to_input_scale_a;
+  int effective_input_to_input_scale_b;
+  int32_t effective_recurrent_to_input_scale_a;
+  int effective_recurrent_to_input_scale_b;
+  int32_t effective_cell_to_input_scale_a;
+  int effective_cell_to_input_scale_b;
+  int32_t effective_input_to_forget_scale_a;
+  int effective_input_to_forget_scale_b;
+  int32_t effective_recurrent_to_forget_scale_a;
+  int effective_recurrent_to_forget_scale_b;
+  int32_t effective_cell_to_forget_scale_a;
+  int effective_cell_to_forget_scale_b;
+  int32_t effective_input_to_cell_scale_a;
+  int effective_input_to_cell_scale_b;
+  int32_t effective_recurrent_to_cell_scale_a;
+  int effective_recurrent_to_cell_scale_b;
+  int32_t effective_input_to_output_scale_a;
+  int effective_input_to_output_scale_b;
+  int32_t effective_recurrent_to_output_scale_a;
+  int effective_recurrent_to_output_scale_b;
+  int32_t effective_cell_to_output_scale_a;
+  int effective_cell_to_output_scale_b;
+  int32_t effective_proj_scale_a;
+  int effective_proj_scale_b;
+  int32_t effective_hidden_scale_a;
+  int effective_hidden_scale_b;
+  int32_t layer_norm_input_scale_a;
+  int layer_norm_input_scale_b;
+  int32_t layer_norm_forget_scale_a;
+  int layer_norm_forget_scale_b;
+  int32_t layer_norm_cell_scale_a;
+  int layer_norm_cell_scale_b;
+  int32_t layer_norm_output_scale_a;
+  int layer_norm_output_scale_b;
+  // Quantized clip value for cell and projection. Zero value means no clipping.
+  int16_t quantized_cell_clip;
+  int8_t quantized_proj_clip;
+  int32_t hidden_zp;
+  int32_t cell_scale;
+  int32_t input_variance_guard;
+  int32_t forget_variance_guard;
+  int32_t cell_variance_guard;
+  int32_t output_variance_guard;
+  // Pre-calculate bias + zero_point * weight.
+  // Unabled to use temporary tensors since those are used in Prepare() and
+  // scratch buffer is only allocated after Preapre().
+  std::unique_ptr<int32_t[]> input_to_forget_effective_bias;
+  std::unique_ptr<int32_t[]> recurrent_to_forget_effective_bias;
+  std::unique_ptr<int32_t[]> input_to_cell_effective_bias;
+  std::unique_ptr<int32_t[]> recurrent_to_cell_effective_bias;
+  std::unique_ptr<int32_t[]> input_to_output_effective_bias;
+  std::unique_ptr<int32_t[]> recurrent_to_output_effective_bias;
+  std::unique_ptr<int32_t[]> input_to_input_effective_bias;
+  std::unique_ptr<int32_t[]> recurrent_to_input_effective_bias;
+  std::unique_ptr<int32_t[]> projection_effective_bias;
+  // Scale and zero point for intermediate tensors.
+  // Used only in the 8x8_8 case.
+  int32_t intermediate_scale_a[8];
+  int32_t intermediate_scale_b[8];
+  int32_t intermediate_zp[12];
+};
+TfLiteStatus EvalFloat(const TfLiteEvalTensor* input,
+                       const TfLiteEvalTensor* input_to_input_weights,
+                       const TfLiteEvalTensor* input_to_forget_weights,
+                       const TfLiteEvalTensor* input_to_cell_weights,
+                       const TfLiteEvalTensor* input_to_output_weights,
+                       const TfLiteEvalTensor* recurrent_to_input_weights,
+                       const TfLiteEvalTensor* recurrent_to_forget_weights,
+                       const TfLiteEvalTensor* recurrent_to_cell_weights,
+                       const TfLiteEvalTensor* recurrent_to_output_weights,
+                       const TfLiteEvalTensor* cell_to_input_weights,
+                       const TfLiteEvalTensor* cell_to_forget_weights,
+                       const TfLiteEvalTensor* cell_to_output_weights,
+                       const TfLiteEvalTensor* input_layer_norm_coefficients,
+                       const TfLiteEvalTensor* forget_layer_norm_coefficients,
+                       const TfLiteEvalTensor* cell_layer_norm_coefficients,
+                       const TfLiteEvalTensor* output_layer_norm_coefficients,
+                       const TfLiteEvalTensor* aux_input,
+                       const TfLiteEvalTensor* aux_input_to_input_weights,
+                       const TfLiteEvalTensor* aux_input_to_forget_weights,
+                       const TfLiteEvalTensor* aux_input_to_cell_weights,
+                       const TfLiteEvalTensor* aux_input_to_output_weights,
+                       const TfLiteEvalTensor* input_gate_bias,
+                       const TfLiteEvalTensor* forget_gate_bias,
+                       const TfLiteEvalTensor* cell_gate_bias,
+                       const TfLiteEvalTensor* output_gate_bias,
+                       const TfLiteEvalTensor* projection_weights,
+                       const TfLiteEvalTensor* projection_bias,
+                       const TfLiteLSTMParams* params, bool forward_sequence,
+                       bool time_major, int output_offset,
+                       TfLiteEvalTensor* scratch_buffer,
+                       TfLiteEvalTensor* output_state,
+                       TfLiteEvalTensor* cell_state, TfLiteEvalTensor* output);
+TfLiteStatus EvalInteger8x8_16(
+    TfLiteContext* context, TfLiteNode* node, const TfLiteEvalTensor* input,
+    const TfLiteEvalTensor* input_to_input_weights,
+    const TfLiteEvalTensor* input_to_forget_weights,
+    const TfLiteEvalTensor* input_to_cell_weights,
+    const TfLiteEvalTensor* input_to_output_weights,
+    const TfLiteEvalTensor* recurrent_to_input_weights,
+    const TfLiteEvalTensor* recurrent_to_forget_weights,
+    const TfLiteEvalTensor* recurrent_to_cell_weights,
+    const TfLiteEvalTensor* recurrent_to_output_weights,
+    const TfLiteEvalTensor* cell_to_input_weights,
+    const TfLiteEvalTensor* cell_to_forget_weights,
+    const TfLiteEvalTensor* cell_to_output_weights,
+    const TfLiteEvalTensor* input_layer_norm_coefficients,
+    const TfLiteEvalTensor* forget_layer_norm_coefficients,
+    const TfLiteEvalTensor* cell_layer_norm_coefficients,
+    const TfLiteEvalTensor* output_layer_norm_coefficients,
+    const TfLiteEvalTensor* input_gate_bias,
+    const TfLiteEvalTensor* forget_gate_bias,
+    const TfLiteEvalTensor* cell_gate_bias,
+    const TfLiteEvalTensor* output_gate_bias,
+    const TfLiteEvalTensor* projection_weights,
+    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
+    bool forward_sequence, bool time_major,
+    const lstm_eval::IntegerLstmParameter* integer_lstm_param,
+    TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
+    TfLiteEvalTensor* output, TfLiteEvalTensor* scratch0,
+    TfLiteEvalTensor* scratch1, TfLiteEvalTensor* scratch2,
+    TfLiteEvalTensor* scratch3, TfLiteEvalTensor* scratch4,
+    TfLiteEvalTensor* scratch5);
+TfLiteStatus EvalInteger8x8_8(
+    const TfLiteEvalTensor* input,
+    const TfLiteEvalTensor* input_to_input_weights,
+    const TfLiteEvalTensor* input_to_forget_weights,
+    const TfLiteEvalTensor* input_to_cell_weights,
+    const TfLiteEvalTensor* input_to_output_weights,
+    const TfLiteEvalTensor* recurrent_to_input_weights,
+    const TfLiteEvalTensor* recurrent_to_forget_weights,
+    const TfLiteEvalTensor* recurrent_to_cell_weights,
+    const TfLiteEvalTensor* recurrent_to_output_weights,
+    const TfLiteEvalTensor* cell_to_input_weights,
+    const TfLiteEvalTensor* cell_to_forget_weights,
+    const TfLiteEvalTensor* cell_to_output_weights,
+    const TfLiteEvalTensor* input_layer_norm_coefficients,
+    const TfLiteEvalTensor* forget_layer_norm_coefficients,
+    const TfLiteEvalTensor* cell_layer_norm_coefficients,
+    const TfLiteEvalTensor* output_layer_norm_coefficients,
+    const TfLiteEvalTensor* input_gate_bias,
+    const TfLiteEvalTensor* forget_gate_bias,
+    const TfLiteEvalTensor* cell_gate_bias,
+    const TfLiteEvalTensor* output_gate_bias,
+    const TfLiteEvalTensor* projection_weights,
+    const TfLiteEvalTensor* projection_bias, const TfLiteLSTMParams* params,
+    TfLiteEvalTensor* output_state, TfLiteEvalTensor* cell_state,
+    TfLiteEvalTensor* output,
+    const lstm_eval::IntegerLstmParameter* integer_lstm_param,
+    TfLiteEvalTensor* scratch0, TfLiteEvalTensor* scratch1,
+    TfLiteEvalTensor* scratch2, TfLiteEvalTensor* scratch3,
+    TfLiteEvalTensor* scratch4, TfLiteEvalTensor* scratch5,
+    TfLiteEvalTensor* scratch6, TfLiteEvalTensor* scratch7);
+}  // namespace lstm_eval
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_LSTM_EVAL_H_

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h ADDED Viewed

@@ -0,0 +1,78 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_
+#define TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_
+namespace tflite_micro {
+namespace ops {
+namespace micro {
+namespace lstm {
+// For full inputs kernel (24-inputs).
+// Please note the 20-input full kernel is deprecated and only kept
+// here for backward compatibility.
+namespace full {
+// Input Tensors of size {n_batch, n_input}
+constexpr int kInputTensor = 0;
+// Input weight tensors of size: {n_cell, n_input}
+constexpr int kInputToInputWeightsTensor = 1;  // Optional
+constexpr int kInputToForgetWeightsTensor = 2;
+constexpr int kInputToCellWeightsTensor = 3;
+constexpr int kInputToOutputWeightsTensor = 4;
+// Recurrent weight tensors of size {n_cell, n_output}
+constexpr int kRecurrentToInputWeightsTensor = 5;  // Optional
+constexpr int kRecurrentToForgetWeightsTensor = 6;
+constexpr int kRecurrentToCellWeightsTensor = 7;
+constexpr int kRecurrentToOutputWeightsTensor = 8;
+// Peephole weights tensors of size {n_cell}, representing a diagonal matrix.
+constexpr int kCellToInputWeightsTensor = 9;    // Optional
+constexpr int kCellToForgetWeightsTensor = 10;  // Optional
+constexpr int kCellToOutputWeightsTensor = 11;  // Optional
+// Gates bias tensors of size {n_cell}
+constexpr int kInputGateBiasTensor = 12;  // Optional
+constexpr int kForgetGateBiasTensor = 13;
+constexpr int kCellGateBiasTensor = 14;
+constexpr int kOutputGateBiasTensor = 15;
+// Projection weight tensor of size {n_output, n_cell}
+constexpr int kProjectionWeightsTensor = 16;  // Optional
+// Projection bias tensor of size {n_output}
+constexpr int kProjectionBiasTensor = 17;  // Optional
+// These state tensors are defined as variable tensors, and will be modified by
+// this op.
+constexpr int kOutputStateTensor = 18;
+constexpr int kCellStateTensor = 19;
+// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
+// matrix.
+constexpr int kInputLayerNormCoefficientsTensor = 20;   // Optional
+constexpr int kForgetLayerNormCoefficientsTensor = 21;  // Optional
+constexpr int kCellLayerNormCoefficientsTensor = 22;    // Optional
+constexpr int kOutputLayerNormCoefficientsTensor = 23;  // Optional
+// Output tensors.
+constexpr int kOutputTensor = 0;
+}  // namespace full
+}  // namespace lstm
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_KERNELS_LSTM_SHARED_H_

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h ADDED Viewed

@@ -0,0 +1,38 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_H_
+#if defined(HIFIMINI)
+#include <xtensa/tie/xt_hifi2.h>
+#include "tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h"
+#endif  // defined(HIFMINI)
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#include "include/nnlib/xa_nnlib_api.h"
+#include "include/nnlib/xa_nnlib_standards.h"
+#define ALIGNED_SIZE(x, bytes) (((x) + (bytes - 1)) & (~(bytes - 1)))
+#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#if defined(VISION_P6)
+#include "utils.h"
+#include "vision_api.h"
+#endif  // defined(VISION_P6)
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_H_

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h ADDED Viewed

@@ -0,0 +1,48 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_ADD_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_ADD_H_
+#include <cstdint>
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/add.h"
+namespace tflite_micro {
+struct XtensaAddOpData {
+  OpDataAdd reference_op_data;
+#if defined(VISION_P6)
+  uint8_t* p_context;  // persistent lib context for this instance saved here
+  uint32_t context_size;
+#endif  // VISION_P6
+};
+#if defined(VISION_P6)
+TfLiteStatus AddPrepareVision(TfLiteContext* context, TfLiteNode* node);
+TfLiteStatus AddEvalQuantizedVision(TfLiteContext* context, TfLiteNode* node,
+                                    const TfLiteAddParams& params,
+                                    const XtensaAddOpData& data,
+                                    const TfLiteEvalTensor* input1,
+                                    const TfLiteEvalTensor* input2,
+                                    TfLiteEvalTensor* output);
+#endif  // VISION_P6
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_ADD_H_

xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h ADDED Viewed

@@ -0,0 +1,89 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_CONV_H_
+#define TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_CONV_H_
+#include <cstdint>
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/micro/kernels/conv.h"
+namespace tflite_micro {
+struct XtensaConvOpData {
+  OpDataConv reference_op_data;
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+  int scratch_tensor_index;
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#if defined(VISION_P6)
+  int8_t* reorder_coefficient_bias;  // buffers used to keep reordered coeff and
+                                     // biases.
+  uint32_t reorder_coefficient_bias_size;
+  int8_t* per_channel_output_shift_int8;
+  uint8_t* p_context;  // persistent lib context for this instance saved here
+  uint32_t context_size;
+  bool is_per_channel_quantized;
+#endif  // VISION_P6
+};
+#if defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+TfLiteStatus ConvPrepareHifi(TfLiteContext* context, TfLiteNode* node);
+TfLiteStatus ConvEvalHifiInt8(TfLiteContext* context, TfLiteNode* node,
+                              const TfLiteConvParams& params,
+                              const XtensaConvOpData& data,
+                              const TfLiteEvalTensor* input,
+                              const TfLiteEvalTensor* filter,
+                              const TfLiteEvalTensor* bias,
+                              TfLiteEvalTensor* output);
+#if defined(HIFI3) || defined(HIFI4)
+TfLiteStatus ConvEvalHifiInt16(TfLiteContext* context, TfLiteNode* node,
+                               const TfLiteConvParams& params,
+                               const XtensaConvOpData& data,
+                               const TfLiteEvalTensor* input,
+                               const TfLiteEvalTensor* filter,
+                               const TfLiteEvalTensor* bias,
+                               TfLiteEvalTensor* output);
+#endif  // defined(HIFI3) || defined(HIFI4)
+#endif  // defined(HIFI3) || defined(HIFI4) || defined(HIFI5)
+#if defined(VISION_P6)
+TfLiteStatus ConvPrepareVision(TfLiteContext* context, TfLiteNode* node);
+TfLiteStatus ConvEvalVision(TfLiteContext* context, TfLiteNode* node,
+                            const TfLiteConvParams& params,
+                            const XtensaConvOpData& data,
+                            const TfLiteEvalTensor* input,
+                            const TfLiteEvalTensor* filter,
+                            const TfLiteEvalTensor* bias,
+                            TfLiteEvalTensor* output);
+#endif  // VISION_P6
+TfLiteStatus ConvReferenceEvalInt8(TfLiteContext* context, TfLiteNode* node);
+TfLiteStatus ConvReferenceEvalInt16(TfLiteContext* context, TfLiteNode* node);
+void* ConvInitXtensa(TfLiteContext* context, const char* buffer, size_t length);
+TfLiteStatus ConvPrepareXtensa(TfLiteContext* context, TfLiteNode* node);
+}  // namespace tflite_micro
+#endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_CONV_H_