xmos-ai-tools 1.1.2.dev216__py3-none-macosx_11_0_arm64.whl → 1.1.2.dev236__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +16 -0
- xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +80 -0
- xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
- xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +15 -15
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/examples/micro_speech/simple_features/model.h → signal/micro/kernels/delay_flexbuffers_generated_data.h} +7 -9
- xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/examples/micro_speech/simple_features/no_simple_features_data.h → signal/micro/kernels/irfft.h} +15 -7
- xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
- xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
- xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
- xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
- xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
- xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
- xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
- xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
- xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/examples/micro_speech/main_functions.h → signal/src/filter_bank_square_root.h} +14 -17
- xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
- xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
- xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
- xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
- xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/examples/micro_speech/micro_features/no_micro_features_data.h → signal/src/log.h} +13 -6
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/python/interpreter/src/python_utils.h → signal/src/max_abs.h} +11 -11
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/examples/micro_speech/micro_features/yes_micro_features_data.h → signal/src/msb.h} +15 -6
- xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
- xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
- xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
- xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
- xmos_ai_tools/runtime/include/{tensorflow/lite/micro/python/interpreter/src/numpy_utils.h → signal/src/window.h} +13 -15
- xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +44 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +6 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +8 -25
- xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +3 -3
- xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +15 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +92 -3
- xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +61 -51
- xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +302 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +129 -43
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +2 -2
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +23 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +210 -151
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +9 -18
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +2 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +103 -72
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +2 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +2 -63
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +87 -26
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +129 -80
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +42 -93
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +5 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +249 -263
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +11 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +5 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +5 -10
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +4 -2
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +25 -14
- xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +14 -3
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +10 -3
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +7 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +6 -5
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +0 -3
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +19 -20
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +8 -31
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +8 -8
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +1 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +9 -9
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +14 -9
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +9 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +119 -100
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +4 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +8 -8
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +4 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +12 -16
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +8 -7
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +5 -5
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +2 -2
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +26 -21
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +4 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +2 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +2 -2
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +5 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +4 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +4 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +4 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +23 -8
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +23 -65
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +15 -57
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +16 -5
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +6 -8
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +114 -32
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +6 -5
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +1 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +1 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +3 -4
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +28 -12
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +1 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
- xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +9139 -5010
- xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
- xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
- xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
- xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
- {xmos_ai_tools-1.1.2.dev216.data → xmos_ai_tools-1.1.2.dev236.data}/data/bin/xcore-opt +0 -0
- {xmos_ai_tools-1.1.2.dev216.dist-info → xmos_ai_tools-1.1.2.dev236.dist-info}/METADATA +3 -4
- {xmos_ai_tools-1.1.2.dev216.dist-info → xmos_ai_tools-1.1.2.dev236.dist-info}/RECORD +128 -105
- {xmos_ai_tools-1.1.2.dev216.dist-info → xmos_ai_tools-1.1.2.dev236.dist-info}/WHEEL +1 -1
- xmos_ai_tools/runtime/include/tensorflow/lite/core/api/op_resolver.h +0 -129
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/all_ops_resolver.h +0 -38
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/audio_provider.h +0 -44
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/command_responder.h +0 -30
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/feature_provider.h +0 -50
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_features_generator.h +0 -30
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_features/micro_model_settings.h +0 -43
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_features/no_feature_data_slice.h +0 -29
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_features/yes_feature_data_slice.h +0 -29
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/recognize_commands.h +0 -151
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/simple_features/no_power_spectrum_data.h +0 -29
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_features_generator.h +0 -29
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/simple_features/simple_model_settings.h +0 -43
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_power_spectrum_data.h +0 -29
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/simple_features/yes_simple_features_data.h +0 -23
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_string.h +0 -33
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/interpreter_wrapper.h +0 -51
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/pybind11_lib.h +0 -64
- xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/shared_library.h +0 -40
- {xmos_ai_tools-1.1.2.dev216.dist-info → xmos_ai_tools-1.1.2.dev236.dist-info}/top_level.txt +0 -0
@@ -38,6 +38,7 @@ limitations under the License.
|
|
38
38
|
/// "third_party/tensorflow/lite/c/common.h".
|
39
39
|
/// Only the TensorFlow Lite implementation itself should include this
|
40
40
|
/// file directly.
|
41
|
+
// IWYU pragma: private, include "third_party/tensorflow/lite/c/common.h"
|
41
42
|
|
42
43
|
#ifndef TENSORFLOW_LITE_CORE_C_COMMON_H_
|
43
44
|
#define TENSORFLOW_LITE_CORE_C_COMMON_H_
|
@@ -157,6 +158,10 @@ int TfLiteFloatArrayGetSizeInBytes(int size);
|
|
157
158
|
// This returns a pointer, that you must free using TfLiteFloatArrayFree().
|
158
159
|
TfLiteFloatArray* TfLiteFloatArrayCreate(int size);
|
159
160
|
|
161
|
+
// Create a copy of an array passed as `src`.
|
162
|
+
// You are expected to free memory with TfLiteFloatArrayFree.
|
163
|
+
TfLiteFloatArray* TfLiteFloatArrayCopy(const TfLiteFloatArray* src);
|
164
|
+
|
160
165
|
// Free memory of array `a`.
|
161
166
|
void TfLiteFloatArrayFree(TfLiteFloatArray* a);
|
162
167
|
#endif // TF_LITE_STATIC_MEMORY
|
@@ -345,6 +350,8 @@ typedef union TfLitePtrUnion {
|
|
345
350
|
// as constant inputs for downstream ops (also in prepare).
|
346
351
|
// * kTfLiteCustom: Custom memory allocation provided by the user. See
|
347
352
|
// TfLiteCustomAllocation below.
|
353
|
+
// * kTfLiteVariantObject: Allocation is an arbitrary type-erased C++ object.
|
354
|
+
// Allocation and deallocation are done through `new` and `delete`.
|
348
355
|
typedef enum TfLiteAllocationType {
|
349
356
|
kTfLiteMemNone = 0,
|
350
357
|
kTfLiteMmapRo,
|
@@ -353,8 +360,40 @@ typedef enum TfLiteAllocationType {
|
|
353
360
|
kTfLiteDynamic,
|
354
361
|
kTfLitePersistentRo,
|
355
362
|
kTfLiteCustom,
|
363
|
+
kTfLiteVariantObject,
|
356
364
|
} TfLiteAllocationType;
|
357
365
|
|
366
|
+
// Memory allocation strategies.
|
367
|
+
//
|
368
|
+
// TfLiteAllocationType values have been overloaded to mean more than their
|
369
|
+
// original intent. This enum should only be used to document the allocation
|
370
|
+
// strategy used by a tensor for it data.
|
371
|
+
typedef enum TfLiteAllocationStrategy {
|
372
|
+
kTfLiteAllocationStrategyUnknown,
|
373
|
+
kTfLiteAllocationStrategyNone, // No data is allocated.
|
374
|
+
kTfLiteAllocationStrategyMMap, // Data is mmaped.
|
375
|
+
kTfLiteAllocationStrategyArena, // Handled by the arena.
|
376
|
+
kTfLiteAllocationStrategyMalloc, // Uses `malloc`/`free`.
|
377
|
+
kTfLiteAllocationStrategyNew // Uses `new[]`/`delete[]`.
|
378
|
+
} TfLiteAllocationStrategy;
|
379
|
+
|
380
|
+
// Describes how stable a tensor attribute is with regards to an interpreter
|
381
|
+
// runs.
|
382
|
+
typedef enum TfLiteRunStability {
|
383
|
+
kTfLiteRunStabilityUnknown,
|
384
|
+
kTfLiteRunStabilityUnstable, // May change at any time.
|
385
|
+
kTfLiteRunStabilitySingleRun, // Will stay the same for one run.
|
386
|
+
kTfLiteRunStabilityAcrossRuns // Will stay the same across all runs.
|
387
|
+
} TfLiteRunStability;
|
388
|
+
|
389
|
+
// Describes the steps of a TFLite operation life cycle.
|
390
|
+
typedef enum TfLiteRunStep {
|
391
|
+
kTfLiteRunStepUnknown,
|
392
|
+
kTfLiteRunStepInit,
|
393
|
+
kTfLiteRunStepPrepare,
|
394
|
+
kTfLiteRunStepEval
|
395
|
+
} TfLiteRunStep;
|
396
|
+
|
358
397
|
// The delegates should use zero or positive integers to represent handles.
|
359
398
|
// -1 is reserved from unallocated status.
|
360
399
|
typedef int TfLiteBufferHandle;
|
@@ -847,7 +886,7 @@ typedef struct TfLiteContext {
|
|
847
886
|
// }
|
848
887
|
//
|
849
888
|
// NOTE: The context owns the memory referenced by partition_params_array. It
|
850
|
-
// will be cleared with another call to
|
889
|
+
// will be cleared with another call to PreviewDelegatePartitioning, or after
|
851
890
|
// TfLiteDelegateParams::Prepare returns.
|
852
891
|
//
|
853
892
|
// WARNING: This is an experimental interface that is subject to change.
|
@@ -878,6 +917,27 @@ typedef struct TfLiteContext {
|
|
878
917
|
TfLiteStatus (*GetModelMetadata)(const struct TfLiteContext* context,
|
879
918
|
const char* name, const char** ptr,
|
880
919
|
size_t* bytes);
|
920
|
+
|
921
|
+
// Retrieves the corresponding TfLiteContext of a subgraph that the given
|
922
|
+
// subgraph_index points to and switches to the delegate context for that
|
923
|
+
// subgraph. If an invalid subgraph index is given, returns kTfLiteError.
|
924
|
+
// NOTE: This function is expected to be paired with ReleaseSubgraphContext()
|
925
|
+
// once the delegate preparation is done and/or the delegate context functions
|
926
|
+
// are no longer needed.
|
927
|
+
//
|
928
|
+
// WARNING: This is an experimental interface that is subject to change.
|
929
|
+
TfLiteStatus (*AcquireSubgraphContext)(
|
930
|
+
struct TfLiteContext* context, int subgraph_index,
|
931
|
+
struct TfLiteContext** acquired_context);
|
932
|
+
// Releases the subgraph context by switching back to the TFLite kernel
|
933
|
+
// context for the subgraph that the given subgraph_index points to.
|
934
|
+
// NOTE: This function is expected to be used after AcquireSubgraphContext()
|
935
|
+
// once the delegate preparation is done and/or the delegate context functions
|
936
|
+
// are no longer needed.
|
937
|
+
//
|
938
|
+
// WARNING: This is an experimental interface that is subject to change.
|
939
|
+
TfLiteStatus (*ReleaseSubgraphContext)(struct TfLiteContext* context,
|
940
|
+
int subgraph_index);
|
881
941
|
} TfLiteContext;
|
882
942
|
|
883
943
|
// `TfLiteRegistrationExternal` is an external version of `TfLiteRegistration`
|
@@ -886,6 +946,64 @@ typedef struct TfLiteContext {
|
|
886
946
|
// field is the exactly the same as with `TfLiteRegistration`.
|
887
947
|
typedef struct TfLiteRegistrationExternal TfLiteRegistrationExternal;
|
888
948
|
|
949
|
+
// The valid values of the `inplace_operator` field in `TfLiteRegistration`.
|
950
|
+
// This allow an op to signal to the runtime that the same data pointer
|
951
|
+
// may be passed as an input and output without impacting the result.
|
952
|
+
// This does not mean that the memory can safely be reused, it is up to the
|
953
|
+
// runtime to determine this, e.g. if another op consumes the same input or not
|
954
|
+
// or if an input tensor has sufficient memory allocated to store the output
|
955
|
+
// data.
|
956
|
+
//
|
957
|
+
// Setting these flags authorizes the runtime to set the data pointers of an
|
958
|
+
// input and output tensor to the same value. In such cases, the memory required
|
959
|
+
// by the output must be less than or equal to that required by the shared
|
960
|
+
// input, never greater. If kTfLiteInplaceOpDataUnmodified is set, then the
|
961
|
+
// runtime can share the same input tensor with multiple operator's outputs,
|
962
|
+
// provided that kTfLiteInplaceOpDataUnmodified is set for all of them.
|
963
|
+
// Otherwise, if an input tensor is consumed by multiple operators, it may only
|
964
|
+
// be shared with the operator which is the last to consume it.
|
965
|
+
//
|
966
|
+
// Note that this is a bitmask, so the values should be 1, 2, 4, 8, ...etc.
|
967
|
+
typedef enum {
|
968
|
+
// The default value. This indicates that the same data pointer cannot safely
|
969
|
+
// be passed as an op's input and output.
|
970
|
+
kTfLiteInplaceOpNone = 0,
|
971
|
+
// This indicates that an op's first output's data is identical to its first
|
972
|
+
// input's data, for example Reshape.
|
973
|
+
kTfLiteInplaceOpDataUnmodified = 1,
|
974
|
+
// Setting kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput means
|
975
|
+
// that InputN may be shared with OutputN instead of with the first output.
|
976
|
+
// This flag requires one or more of kTfLiteInplaceOpInputNShared to be set.
|
977
|
+
kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput = 2,
|
978
|
+
// kTfLiteInplaceOpInputNShared indicates that it is safe for an op to share
|
979
|
+
// InputN's data pointer with an output tensor. If
|
980
|
+
// kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput is set then
|
981
|
+
// kTfLiteInplaceOpInputNShared indicates that InputN may be shared
|
982
|
+
// with OutputN, otherwise kTfLiteInplaceOpInputNShared indicates that InputN
|
983
|
+
// may be shared with the first output.
|
984
|
+
//
|
985
|
+
// Indicates that an op's first input may be shared with the first output
|
986
|
+
// tensor. kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput has
|
987
|
+
// no impact on the behavior allowed by this flag.
|
988
|
+
kTfLiteInplaceOpInput0Shared = 4,
|
989
|
+
// Indicates that an op's second input may be shared with the first output
|
990
|
+
// if kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput is not set
|
991
|
+
// or second output if kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput
|
992
|
+
// is set.
|
993
|
+
kTfLiteInplaceOpInput1Shared = 8,
|
994
|
+
// Indicates that an op's third input may be shared with the first output
|
995
|
+
// if kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput is not set
|
996
|
+
// or third output if kTfLiteInplaceInputCanBeSharedWithCorrespondingOutput is
|
997
|
+
// set.
|
998
|
+
kTfLiteInplaceOpInput2Shared = 16,
|
999
|
+
// Placeholder to ensure that enum can hold 64 bit values to accommodate
|
1000
|
+
// future fields.
|
1001
|
+
kTfLiteInplaceOpMaxValue = UINT64_MAX,
|
1002
|
+
} TfLiteInPlaceOp;
|
1003
|
+
|
1004
|
+
// The number of shareable inputs supported.
|
1005
|
+
static const int kTfLiteMaxSharableOpInputs = 3;
|
1006
|
+
|
889
1007
|
typedef struct TfLiteRegistration {
|
890
1008
|
// Initializes the op from serialized data.
|
891
1009
|
// Called only *once* for the lifetime of the op, so any one-time allocations
|
@@ -966,8 +1084,37 @@ typedef struct TfLiteRegistration {
|
|
966
1084
|
// does not support asynchronous execution for this `node`.
|
967
1085
|
struct TfLiteAsyncKernel* (*async_kernel)(TfLiteContext* context,
|
968
1086
|
TfLiteNode* node);
|
1087
|
+
|
1088
|
+
// Indicates if an operator's output may safely overwrite its inputs.
|
1089
|
+
// See the comments in `TfLiteInPlaceOp`.
|
1090
|
+
uint64_t inplace_operator;
|
969
1091
|
} TfLiteRegistration;
|
970
1092
|
|
1093
|
+
/// \private
|
1094
|
+
// Old version of `TfLiteRegistration` to maintain binary backward
|
1095
|
+
// compatibility.
|
1096
|
+
// The legacy registration type must be a POD struct type whose field types must
|
1097
|
+
// be a prefix of the field types in TfLiteRegistration, and offset of the first
|
1098
|
+
// field in TfLiteRegistration that is not present in the legacy registration
|
1099
|
+
// type must be greater than or equal to the size of the legacy registration
|
1100
|
+
// type.
|
1101
|
+
// WARNING: This structure is deprecated / not an official part of the
|
1102
|
+
// API. It should be only used for binary backward compatibility.
|
1103
|
+
typedef struct TfLiteRegistration_V3 {
|
1104
|
+
void* (*init)(TfLiteContext* context, const char* buffer, size_t length);
|
1105
|
+
void (*free)(TfLiteContext* context, void* buffer);
|
1106
|
+
TfLiteStatus (*prepare)(TfLiteContext* context, TfLiteNode* node);
|
1107
|
+
TfLiteStatus (*invoke)(TfLiteContext* context, TfLiteNode* node);
|
1108
|
+
const char* (*profiling_string)(const TfLiteContext* context,
|
1109
|
+
const TfLiteNode* node);
|
1110
|
+
int32_t builtin_code;
|
1111
|
+
const char* custom_name;
|
1112
|
+
int version;
|
1113
|
+
TfLiteRegistrationExternal* registration_external;
|
1114
|
+
struct TfLiteAsyncKernel* (*async_kernel)(TfLiteContext* context,
|
1115
|
+
TfLiteNode* node);
|
1116
|
+
} TfLiteRegistration_V3;
|
1117
|
+
|
971
1118
|
/// \private
|
972
1119
|
// Old version of `TfLiteRegistration` to maintain binary backward
|
973
1120
|
// compatibility.
|
@@ -1158,6 +1305,7 @@ typedef struct TfLiteOpaqueDelegateBuilder {
|
|
1158
1305
|
int64_t flags;
|
1159
1306
|
} TfLiteOpaqueDelegateBuilder;
|
1160
1307
|
|
1308
|
+
#ifndef TF_LITE_STATIC_MEMORY
|
1161
1309
|
// Creates an opaque delegate and returns its address. The opaque delegate will
|
1162
1310
|
// behave according to the provided 'opaque_delegate_builder'. The lifetime of
|
1163
1311
|
// the objects pointed to by any of the fields within the
|
@@ -1174,6 +1322,7 @@ TfLiteOpaqueDelegate* TfLiteOpaqueDelegateCreate(
|
|
1174
1322
|
// Deletes the provided opaque 'delegate'. This function has no effect if the
|
1175
1323
|
// 'delegate' is a null pointer.
|
1176
1324
|
void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate);
|
1325
|
+
#endif // TF_LITE_STATIC_MEMORY
|
1177
1326
|
|
1178
1327
|
// Returns a pointer to the data associated with the provided opaque 'delegate'.
|
1179
1328
|
//
|
@@ -1189,7 +1338,159 @@ void TfLiteOpaqueDelegateDelete(TfLiteOpaqueDelegate* delegate);
|
|
1189
1338
|
// 'opaque_delegate_builder' field is null.
|
1190
1339
|
void* TfLiteOpaqueDelegateGetData(const TfLiteOpaqueDelegate* delegate);
|
1191
1340
|
|
1341
|
+
// Returns a tensor data allocation strategy.
|
1342
|
+
TfLiteAllocationStrategy TfLiteTensorGetAllocationStrategy(
|
1343
|
+
const TfLiteTensor* t);
|
1344
|
+
|
1345
|
+
// Returns how stable a tensor data buffer address is across runs.
|
1346
|
+
TfLiteRunStability TfLiteTensorGetBufferAddressStability(const TfLiteTensor* t);
|
1347
|
+
|
1348
|
+
// Returns how stable a tensor data values are across runs.
|
1349
|
+
TfLiteRunStability TfLiteTensorGetDataStability(const TfLiteTensor* t);
|
1350
|
+
|
1351
|
+
// Returns the operation step when the data of a tensor is populated.
|
1352
|
+
//
|
1353
|
+
// Some operations can precompute their results before the evaluation step. This
|
1354
|
+
// makes the data available earlier for subsequent operations.
|
1355
|
+
TfLiteRunStep TfLiteTensorGetDataKnownStep(const TfLiteTensor* t);
|
1356
|
+
|
1357
|
+
// Returns the operation steop when the shape of a tensor is computed.
|
1358
|
+
//
|
1359
|
+
// Some operations can precompute the shape of their results before the
|
1360
|
+
// evaluation step. This makes the shape available earlier for subsequent
|
1361
|
+
// operations.
|
1362
|
+
TfLiteRunStep TfLiteTensorGetShapeKnownStep(const TfLiteTensor* t);
|
1363
|
+
|
1192
1364
|
#ifdef __cplusplus
|
1193
1365
|
} // extern "C"
|
1366
|
+
|
1367
|
+
#include <utility>
|
1368
|
+
|
1369
|
+
// --- TFLITE VARIANT TENSORS ----
|
1370
|
+
// Programming languges usually define "variant" as a type that can hold an
|
1371
|
+
// unbounded set of types. See std::any
|
1372
|
+
// (https://en.cppreference.com/w/cpp/utility/any) for a related standard
|
1373
|
+
// library construct. In tensorflow, variant tensors have a data member which is
|
1374
|
+
// an Object that is destructible and copy constructible.
|
1375
|
+
// Variant tensors are commonly used to represent non trivial data
|
1376
|
+
// semantics that don't fit into simple primitives, such as lists of tensors and
|
1377
|
+
// datasets. Additionally, they can facilitate containers for optimizing
|
1378
|
+
// memory movement of tensor data.
|
1379
|
+
//
|
1380
|
+
// The following set of classes define the variant tensor member for tflite.
|
1381
|
+
// They implement a type-erased container intended to be used behind the
|
1382
|
+
// `data.data : void*` member of `TfLiteTensor`s. Runtime functions interact
|
1383
|
+
// the variant member at the level of a `VariantData`, whereas kernels
|
1384
|
+
// operate with the full knowledge of the un-erased type. The `VariantData`
|
1385
|
+
// class provides abstract methods for destroying and copying `VariantData`.
|
1386
|
+
// Invoking these methods will dispatch to the erased type opaquely.
|
1387
|
+
// The contents of any object of type derived from `AbstractVariant` can be
|
1388
|
+
// written to `TfLiteTensor::data::data : void*` from kernels. If the runtime
|
1389
|
+
// were to copy such a tensor through `TfLiteTensorCopy`, the destination data
|
1390
|
+
// member will contain the result of invoking the erased type's copy
|
1391
|
+
// constructor. Similar for the runtime releasing tensors from memory, the
|
1392
|
+
// erased type's destructor will be invoked. There are a few caveats to consider
|
1393
|
+
// to use these safely, which we discuss below.
|
1394
|
+
//
|
1395
|
+
// EXAMPLE: READING VARIANT TENSORS
|
1396
|
+
// ```
|
1397
|
+
// // retrieve input with `type == kTfLiteVariant`
|
1398
|
+
// TfLiteTensor* input = ...
|
1399
|
+
// // must first static cast to `VariantData`, more on this below.
|
1400
|
+
// VariantData* vd_input = static_cast<VariantData*>(t->data.data);
|
1401
|
+
// CustomType* typed_input =
|
1402
|
+
// static_cast<CustomType*>(vd_input);
|
1403
|
+
// // do custom work on `typed_input`...
|
1404
|
+
// ```
|
1405
|
+
//
|
1406
|
+
// EXAMPLE: WRITING VARIANT TENSORS
|
1407
|
+
// ```
|
1408
|
+
// TfLiteTensor* output = ...
|
1409
|
+
// // construct a new variant object behind the target tensor
|
1410
|
+
// TfLiteVariantRealloc<DerivedType, DerivedArgs...>(output, args...);
|
1411
|
+
// // again must static cast to `VariantData*` before writing to `void*`.
|
1412
|
+
// output->data.data = static_cast<VariantData*>(typed_output);
|
1413
|
+
// ```
|
1414
|
+
//
|
1415
|
+
// WHY STATIC CAST TO `VariantData*`
|
1416
|
+
// The Standard defines a `reinterpret_cast` from a derived type to its
|
1417
|
+
// parents as undefined behavior when the parent is a non-standard layout.
|
1418
|
+
// https://en.cppreference.com/w/cpp/language/reinterpret_cast (see bullet 5).
|
1419
|
+
// Due to the `VariantData` having virtual members it is indeed non-standard
|
1420
|
+
// layout, and any type derived from `VariantData` fails to be
|
1421
|
+
// "transparently-replaceable". I.e. implicit cast from derived to base in this
|
1422
|
+
// case may adjust the pointer and by definition `reinterpret_cast` will not
|
1423
|
+
// the adjust the pointer.
|
1424
|
+
// Thus, dereferencing a pointer of type `VariantData` which addresses
|
1425
|
+
// the first byte of an object of said derived type is UB unless it was first
|
1426
|
+
// implicitly or statically casted to a `VariantData`. Writing the object of
|
1427
|
+
// derived type directly to `void*` which is dereferenced as a `VariantData` is
|
1428
|
+
// then UB, and so the intermediate cast through `VariantData` must be enforced.
|
1429
|
+
// A good example of this issue is ellucidate in the bottom code snippet
|
1430
|
+
// here: https://en.cppreference.com/w/cpp/utility/launder.
|
1431
|
+
class VariantData {
|
1432
|
+
public:
|
1433
|
+
// All variant objects must be able to be destroyed and copied.
|
1434
|
+
virtual ~VariantData() = default;
|
1435
|
+
// A "virtual copy-constructor". Often the destination tensor of a variant
|
1436
|
+
// copy may have been previously allocated in a prior call to inference. We
|
1437
|
+
// allow the copy to target the destinations buffer (`maybe_alloc`),
|
1438
|
+
// for potential reuse and optimizations. `maybe_alloc` must be of the same
|
1439
|
+
// underlying derived type. References to whatever object is at
|
1440
|
+
// `maybe_alloc` may be invalidated.
|
1441
|
+
virtual VariantData* CloneTo(VariantData* maybe_alloc) const = 0;
|
1442
|
+
};
|
1443
|
+
|
1444
|
+
// Concrete implementations extend `AbstractVariantData` with CRPT.
|
1445
|
+
template <typename ErasedDerived>
|
1446
|
+
class AbstractVariantData : public VariantData {
|
1447
|
+
public:
|
1448
|
+
VariantData* CloneTo(VariantData* maybe_alloc) const override {
|
1449
|
+
if (maybe_alloc != nullptr) {
|
1450
|
+
// If the output is still allocated, then its object may still be
|
1451
|
+
// in its life time and the destructor must be called before re-using the
|
1452
|
+
// buffer.
|
1453
|
+
// This may actual have a non-negligible effect on performance if the
|
1454
|
+
// destructor is complex. A future iteration may
|
1455
|
+
// introduce copy or move assignment semantics, allowing for the
|
1456
|
+
// underlying implementation to optimize for this case.
|
1457
|
+
auto* derived = static_cast<ErasedDerived*>(maybe_alloc);
|
1458
|
+
derived->~ErasedDerived();
|
1459
|
+
return new (derived)
|
1460
|
+
ErasedDerived(static_cast<ErasedDerived const&>(*this));
|
1461
|
+
}
|
1462
|
+
return new ErasedDerived(static_cast<ErasedDerived const&>(*this));
|
1463
|
+
}
|
1464
|
+
|
1465
|
+
protected:
|
1466
|
+
AbstractVariantData() = default;
|
1467
|
+
AbstractVariantData(const AbstractVariantData&) = default;
|
1468
|
+
AbstractVariantData(AbstractVariantData&&) = delete;
|
1469
|
+
};
|
1470
|
+
|
1471
|
+
// Analogous to `TfLiteTensorRealloc` for allocation of tensors whose
|
1472
|
+
// data member points to an arbitrary C++ object. `VariantType` refers
|
1473
|
+
// to the erased type of said object and `VariantArgs` refers to
|
1474
|
+
// a list of argument types with which to construct a new `VariantType`.
|
1475
|
+
// `VariantArgs` must match a constructor of `VariantType`.
|
1476
|
+
template <class VariantType, class... VariantArgs>
|
1477
|
+
TfLiteStatus TfLiteTensorVariantRealloc(TfLiteTensor* t,
|
1478
|
+
VariantArgs&&... args) {
|
1479
|
+
if (t->type != kTfLiteVariant) return kTfLiteError;
|
1480
|
+
VariantType* new_vd;
|
1481
|
+
if (t->data.raw != nullptr) {
|
1482
|
+
auto* target_vd = static_cast<VariantData*>(t->data.data);
|
1483
|
+
target_vd->~VariantData();
|
1484
|
+
// As above, we assume if `t` is already allocated then it was allocated
|
1485
|
+
// with the same `VariantType` as templated.
|
1486
|
+
new_vd = new (t->data.raw) VariantType(std::forward<VariantArgs>(args)...);
|
1487
|
+
} else {
|
1488
|
+
new_vd = new VariantType(std::forward<VariantArgs>(args)...);
|
1489
|
+
}
|
1490
|
+
t->data.data = static_cast<VariantData*>(new_vd);
|
1491
|
+
t->allocation_type = kTfLiteVariantObject;
|
1492
|
+
return kTfLiteOk;
|
1493
|
+
}
|
1494
|
+
|
1194
1495
|
#endif // __cplusplus
|
1195
1496
|
#endif // TENSORFLOW_LITE_CORE_C_COMMON_H_
|
@@ -0,0 +1,78 @@
|
|
1
|
+
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
// This provides utility macros and functions that are inherently platform
|
16
|
+
// specific or shared across runtime & converter.
|
17
|
+
#ifndef TENSORFLOW_LITE_CORE_MACROS_H_
|
18
|
+
#define TENSORFLOW_LITE_CORE_MACROS_H_
|
19
|
+
|
20
|
+
#ifdef __has_builtin
|
21
|
+
#define TFLITE_HAS_BUILTIN(x) __has_builtin(x)
|
22
|
+
#else
|
23
|
+
#define TFLITE_HAS_BUILTIN(x) 0
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#if (!defined(__NVCC__)) && (TFLITE_HAS_BUILTIN(__builtin_expect) || \
|
27
|
+
(defined(__GNUC__) && __GNUC__ >= 3))
|
28
|
+
#define TFLITE_EXPECT_FALSE(cond) __builtin_expect(cond, false)
|
29
|
+
#define TFLITE_EXPECT_TRUE(cond) __builtin_expect(!!(cond), true)
|
30
|
+
#else
|
31
|
+
#define TFLITE_EXPECT_FALSE(cond) (cond)
|
32
|
+
#define TFLITE_EXPECT_TRUE(cond) (cond)
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#ifdef _WIN32
|
36
|
+
#define TFLITE_NOINLINE __declspec(noinline)
|
37
|
+
#else
|
38
|
+
#ifdef __has_attribute
|
39
|
+
#if __has_attribute(noinline)
|
40
|
+
#define TFLITE_NOINLINE __attribute__((noinline))
|
41
|
+
#else
|
42
|
+
#define TFLITE_NOINLINE
|
43
|
+
#endif // __has_attribute(noinline)
|
44
|
+
#else
|
45
|
+
#define TFLITE_NOINLINE
|
46
|
+
#endif // __has_attribute
|
47
|
+
#endif // _WIN32
|
48
|
+
|
49
|
+
// Normally we'd use ABSL_HAVE_ATTRIBUTE_WEAK and ABSL_ATTRIBUTE_WEAK, but
|
50
|
+
// we avoid the absl dependency for binary size reasons.
|
51
|
+
#ifdef __has_attribute
|
52
|
+
#define TFLITE_HAS_ATTRIBUTE(x) __has_attribute(x)
|
53
|
+
#else
|
54
|
+
#define TFLITE_HAS_ATTRIBUTE(x) 0
|
55
|
+
#endif
|
56
|
+
|
57
|
+
#if (TFLITE_HAS_ATTRIBUTE(weak) || \
|
58
|
+
(defined(__GNUC__) && !defined(__clang__))) && \
|
59
|
+
!(defined(__llvm__) && defined(_WIN32)) && !defined(__MINGW32__)
|
60
|
+
#undef TFLITE_ATTRIBUTE_WEAK
|
61
|
+
#define TFLITE_ATTRIBUTE_WEAK __attribute__((weak))
|
62
|
+
#define TFLITE_HAS_ATTRIBUTE_WEAK 1
|
63
|
+
#else
|
64
|
+
#define TFLITE_ATTRIBUTE_WEAK
|
65
|
+
#define TFLITE_HAS_ATTRIBUTE_WEAK 0
|
66
|
+
#endif
|
67
|
+
|
68
|
+
#ifndef TF_LITE_STATIC_MEMORY
|
69
|
+
// maximum size of a valid flatbuffer
|
70
|
+
inline constexpr unsigned int flatbuffer_size_max = 2147483648;
|
71
|
+
// If none zero then the buffer is stored outside of the flatbuffers, string
|
72
|
+
inline constexpr char tflite_metadata_buffer_location[] = "buffer_location";
|
73
|
+
// field for minimum runtime version, string
|
74
|
+
inline constexpr char tflite_metadata_min_runtime_version[] =
|
75
|
+
"min_runtime_version";
|
76
|
+
#endif
|
77
|
+
|
78
|
+
#endif // TENSORFLOW_LITE_CORE_MACROS_H_
|
@@ -16,6 +16,10 @@ limitations under the License.
|
|
16
16
|
#define TENSORFLOW_LITE_KERNELS_INTERNAL_COMMON_H_
|
17
17
|
|
18
18
|
#include <algorithm>
|
19
|
+
#include <cstddef>
|
20
|
+
#include <cstdint>
|
21
|
+
|
22
|
+
#include "tensorflow/lite/kernels/internal/runtime_shape.h"
|
19
23
|
#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
20
24
|
#ifdef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
|
21
25
|
#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
|
@@ -26,6 +30,7 @@ limitations under the License.
|
|
26
30
|
#include <functional>
|
27
31
|
|
28
32
|
#include "fixedpoint/fixedpoint.h"
|
33
|
+
#include "tensorflow/lite/core/macros.h"
|
29
34
|
#include "tensorflow/lite/kernels/internal/cppmath.h"
|
30
35
|
#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
|
31
36
|
#include "tensorflow/lite/kernels/internal/types.h"
|
@@ -34,6 +39,117 @@ namespace tflite {
|
|
34
39
|
|
35
40
|
constexpr int kReverseShift = -1;
|
36
41
|
|
42
|
+
// Reduces and compresses dimensions so that broadcast handling becomes more
|
43
|
+
// efficient. Returns true if the output shape is broadcastable; it doesn't
|
44
|
+
// contain any degenerate dimension, i.e. shape dimension = 0. False otherwise.
|
45
|
+
template <int MAX_DIM = 6>
|
46
|
+
bool ReduceDimensionsForBroadcast(const RuntimeShape& input1_shape,
|
47
|
+
const RuntimeShape& input2_shape,
|
48
|
+
size_t* compressed_input1_stride,
|
49
|
+
size_t* compressed_input2_stride,
|
50
|
+
size_t* compressed_output_shape) {
|
51
|
+
size_t num_compressed_dims = 0;
|
52
|
+
size_t compressed_input1_shape[MAX_DIM];
|
53
|
+
size_t compressed_input2_shape[MAX_DIM];
|
54
|
+
std::fill(compressed_input1_shape, compressed_input1_shape + MAX_DIM, 1);
|
55
|
+
std::fill(compressed_input2_shape, compressed_input2_shape + MAX_DIM, 1);
|
56
|
+
std::fill(compressed_output_shape, compressed_output_shape + MAX_DIM, 1);
|
57
|
+
bool broadcast_input1 = false;
|
58
|
+
bool broadcast_input2 = false;
|
59
|
+
bool first_nonunit = true;
|
60
|
+
const size_t num_input1_dims = input1_shape.DimensionsCount();
|
61
|
+
const size_t num_input2_dims = input2_shape.DimensionsCount();
|
62
|
+
const int32_t* input1_dims = input1_shape.DimsData();
|
63
|
+
const int32_t* input2_dims = input2_shape.DimsData();
|
64
|
+
const size_t num_common_dims = std::min(num_input1_dims, num_input2_dims);
|
65
|
+
for (size_t i = 1; i <= num_common_dims; i++) {
|
66
|
+
const size_t input1_dim = input1_dims[num_input1_dims - i];
|
67
|
+
const size_t input2_dim = input2_dims[num_input2_dims - i];
|
68
|
+
if (input1_dim == 0 || input2_dim == 0) {
|
69
|
+
return false;
|
70
|
+
}
|
71
|
+
if (input1_dim == 1 && input2_dim == 1) {
|
72
|
+
continue;
|
73
|
+
}
|
74
|
+
assert(!broadcast_input1 || !broadcast_input2);
|
75
|
+
|
76
|
+
if (input1_dim == 1) {
|
77
|
+
if (!broadcast_input1) {
|
78
|
+
broadcast_input1 = true;
|
79
|
+
broadcast_input2 = false;
|
80
|
+
num_compressed_dims++;
|
81
|
+
}
|
82
|
+
compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
|
83
|
+
compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
|
84
|
+
} else if (input2_dim == 1) {
|
85
|
+
if (!broadcast_input2) {
|
86
|
+
broadcast_input1 = false;
|
87
|
+
broadcast_input2 = true;
|
88
|
+
num_compressed_dims++;
|
89
|
+
}
|
90
|
+
compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
|
91
|
+
compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
|
92
|
+
} else {
|
93
|
+
TFLITE_DCHECK(input1_dim == input2_dim);
|
94
|
+
if (broadcast_input1 || broadcast_input2 || first_nonunit) {
|
95
|
+
broadcast_input1 = false;
|
96
|
+
broadcast_input2 = false;
|
97
|
+
num_compressed_dims++;
|
98
|
+
}
|
99
|
+
compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
|
100
|
+
compressed_input2_shape[num_compressed_dims - 1] *= input1_dim;
|
101
|
+
compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
|
102
|
+
}
|
103
|
+
first_nonunit = false;
|
104
|
+
}
|
105
|
+
if (num_input1_dims > num_input2_dims) {
|
106
|
+
if (!broadcast_input2) {
|
107
|
+
num_compressed_dims++;
|
108
|
+
}
|
109
|
+
for (size_t i = 0; i < num_input1_dims - num_input2_dims; i++) {
|
110
|
+
const size_t input1_dim = input1_dims[i];
|
111
|
+
if (input1_dim == 0) {
|
112
|
+
return false;
|
113
|
+
}
|
114
|
+
compressed_input1_shape[num_compressed_dims - 1] *= input1_dim;
|
115
|
+
compressed_output_shape[num_compressed_dims - 1] *= input1_dim;
|
116
|
+
}
|
117
|
+
} else if (num_input2_dims > num_input1_dims) {
|
118
|
+
if (!broadcast_input1) {
|
119
|
+
num_compressed_dims++;
|
120
|
+
}
|
121
|
+
for (size_t i = 0; i < num_input2_dims - num_input1_dims; i++) {
|
122
|
+
const size_t input2_dim = input2_dims[i];
|
123
|
+
if (input2_dim == 0) {
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
compressed_input2_shape[num_compressed_dims - 1] *= input2_dim;
|
127
|
+
compressed_output_shape[num_compressed_dims - 1] *= input2_dim;
|
128
|
+
}
|
129
|
+
}
|
130
|
+
num_compressed_dims = (num_compressed_dims > 1) ? num_compressed_dims : 1;
|
131
|
+
|
132
|
+
int input1_stride = 1;
|
133
|
+
int input2_stride = 1;
|
134
|
+
for (int i = 0; i < MAX_DIM; ++i) {
|
135
|
+
compressed_input1_stride[i] = input1_stride;
|
136
|
+
input1_stride *= compressed_input1_shape[i];
|
137
|
+
compressed_input2_stride[i] = input2_stride;
|
138
|
+
input2_stride *= compressed_input2_shape[i];
|
139
|
+
}
|
140
|
+
for (int i = 0; i < MAX_DIM; ++i) {
|
141
|
+
if (compressed_input1_shape[i] != compressed_input2_shape[i]) {
|
142
|
+
if (compressed_input1_shape[i] == 1) {
|
143
|
+
compressed_input1_stride[i] = 0;
|
144
|
+
} else {
|
145
|
+
TFLITE_DCHECK_EQ(compressed_input2_shape[i], 1);
|
146
|
+
compressed_input2_stride[i] = 0;
|
147
|
+
}
|
148
|
+
}
|
149
|
+
}
|
150
|
+
return true;
|
151
|
+
}
|
152
|
+
|
37
153
|
inline void GetActivationMinMax(FusedActivationFunctionType ac,
|
38
154
|
float* output_activation_min,
|
39
155
|
float* output_activation_max) {
|
@@ -250,42 +366,11 @@ inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(
|
|
250
366
|
quantized_multiplier);
|
251
367
|
}
|
252
368
|
|
253
|
-
|
254
|
-
|
255
|
-
int shift) {
|
256
|
-
using gemmlowp::RoundingDivideByPOT;
|
257
|
-
using gemmlowp::SaturatingRoundingDoublingHighMul;
|
258
|
-
int left_shift = shift > 0 ? shift : 0;
|
259
|
-
int right_shift = shift > 0 ? 0 : -shift;
|
260
|
-
return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(
|
261
|
-
x * (1 << left_shift), quantized_multiplier),
|
262
|
-
right_shift);
|
263
|
-
}
|
369
|
+
TFLITE_NOINLINE int32_t MultiplyByQuantizedMultiplier(
|
370
|
+
int32_t x, int32_t quantized_multiplier, int shift);
|
264
371
|
|
265
|
-
|
266
|
-
|
267
|
-
int shift) {
|
268
|
-
// Inputs:
|
269
|
-
// - quantized_multiplier has fixed point at bit 31
|
270
|
-
// - shift is -31 to +7 (negative for right shift)
|
271
|
-
//
|
272
|
-
// Assumptions: The following input ranges are assumed
|
273
|
-
// - quantize_scale>=0 (the usual range is (1<<30) to (1>>31)-1)
|
274
|
-
// - scaling is chosen so final scaled result fits in int32_t
|
275
|
-
// - input x is in the range -(1<<47) <= x < (1<<47)
|
276
|
-
assert(quantized_multiplier >= 0);
|
277
|
-
assert(shift >= -31 && shift < 8);
|
278
|
-
assert(x >= -(static_cast<int64_t>(1) << 47) &&
|
279
|
-
x < (static_cast<int64_t>(1) << 47));
|
280
|
-
|
281
|
-
int32_t reduced_multiplier = (quantized_multiplier < 0x7FFF0000)
|
282
|
-
? ((quantized_multiplier + (1 << 15)) >> 16)
|
283
|
-
: 0x7FFF;
|
284
|
-
int total_shift = 15 - shift;
|
285
|
-
x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
|
286
|
-
int32_t result = x >> total_shift;
|
287
|
-
return result;
|
288
|
-
}
|
372
|
+
TFLITE_NOINLINE int32_t MultiplyByQuantizedMultiplier(
|
373
|
+
int64_t x, int32_t quantized_multiplier, int shift);
|
289
374
|
|
290
375
|
#ifdef USE_NEON
|
291
376
|
// Round uses ARM's rounding shift right.
|
@@ -328,14 +413,16 @@ template <typename T>
|
|
328
413
|
int CountLeadingZeros(T integer_input) {
|
329
414
|
static_assert(std::is_unsigned<T>::value,
|
330
415
|
"Only unsigned integer types handled.");
|
331
|
-
#if defined(__GNUC__)
|
332
|
-
return integer_input ? __builtin_clz(integer_input)
|
333
|
-
: std::numeric_limits<T>::digits;
|
334
|
-
#else
|
335
416
|
if (integer_input == 0) {
|
336
417
|
return std::numeric_limits<T>::digits;
|
337
418
|
}
|
338
|
-
|
419
|
+
#if defined(__GNUC__)
|
420
|
+
if (std::is_same<T, uint32_t>::value) {
|
421
|
+
return __builtin_clz(integer_input);
|
422
|
+
} else if (std::is_same<T, uint64_t>::value) {
|
423
|
+
return __builtin_clzll(integer_input);
|
424
|
+
}
|
425
|
+
#endif
|
339
426
|
const T one_in_leading_positive = static_cast<T>(1)
|
340
427
|
<< (std::numeric_limits<T>::digits - 1);
|
341
428
|
int leading_zeros = 0;
|
@@ -344,7 +431,6 @@ int CountLeadingZeros(T integer_input) {
|
|
344
431
|
++leading_zeros;
|
345
432
|
}
|
346
433
|
return leading_zeros;
|
347
|
-
#endif
|
348
434
|
}
|
349
435
|
|
350
436
|
template <typename T>
|
@@ -1039,8 +1125,8 @@ inline void NdArrayDescsForElementwiseBroadcast(const Dims<N>& input0_dims,
|
|
1039
1125
|
|
1040
1126
|
// Copies dims to desc, calculating strides.
|
1041
1127
|
template <int N>
|
1042
|
-
|
1043
|
-
|
1128
|
+
TFLITE_NOINLINE void CopyDimsToDesc(const RuntimeShape& input_shape,
|
1129
|
+
NdArrayDesc<N>* desc_out) {
|
1044
1130
|
int desc_stride = 1;
|
1045
1131
|
for (int i = N - 1; i >= 0; --i) {
|
1046
1132
|
desc_out->extents[i] = input_shape.Dims(i);
|