xmos-ai-tools 1.3.2.dev80__py3-none-macosx_10_15_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (395) hide show
  1. xmos_ai_tools/__init__.py +7 -0
  2. xmos_ai_tools/io_server/__init__.py +151 -0
  3. xmos_ai_tools/runtime/__init__.py +0 -0
  4. xmos_ai_tools/runtime/buildfiles/aitoolslib.cmake +13 -0
  5. xmos_ai_tools/runtime/buildfiles/aitoolslib.make +8 -0
  6. xmos_ai_tools/runtime/include/flash_server.h +74 -0
  7. xmos_ai_tools/runtime/include/flatbuffers/allocator.h +68 -0
  8. xmos_ai_tools/runtime/include/flatbuffers/array.h +243 -0
  9. xmos_ai_tools/runtime/include/flatbuffers/base.h +474 -0
  10. xmos_ai_tools/runtime/include/flatbuffers/bfbs_generator.h +43 -0
  11. xmos_ai_tools/runtime/include/flatbuffers/buffer.h +142 -0
  12. xmos_ai_tools/runtime/include/flatbuffers/buffer_ref.h +53 -0
  13. xmos_ai_tools/runtime/include/flatbuffers/code_generators.h +235 -0
  14. xmos_ai_tools/runtime/include/flatbuffers/default_allocator.h +64 -0
  15. xmos_ai_tools/runtime/include/flatbuffers/detached_buffer.h +114 -0
  16. xmos_ai_tools/runtime/include/flatbuffers/flatbuffer_builder.h +1197 -0
  17. xmos_ai_tools/runtime/include/flatbuffers/flatbuffers.h +270 -0
  18. xmos_ai_tools/runtime/include/flatbuffers/flatc.h +111 -0
  19. xmos_ai_tools/runtime/include/flatbuffers/flexbuffers.h +1897 -0
  20. xmos_ai_tools/runtime/include/flatbuffers/grpc.h +300 -0
  21. xmos_ai_tools/runtime/include/flatbuffers/hash.h +127 -0
  22. xmos_ai_tools/runtime/include/flatbuffers/idl.h +1232 -0
  23. xmos_ai_tools/runtime/include/flatbuffers/minireflect.h +419 -0
  24. xmos_ai_tools/runtime/include/flatbuffers/pch/flatc_pch.h +39 -0
  25. xmos_ai_tools/runtime/include/flatbuffers/pch/pch.h +38 -0
  26. xmos_ai_tools/runtime/include/flatbuffers/reflection.h +502 -0
  27. xmos_ai_tools/runtime/include/flatbuffers/reflection_generated.h +1449 -0
  28. xmos_ai_tools/runtime/include/flatbuffers/registry.h +128 -0
  29. xmos_ai_tools/runtime/include/flatbuffers/stl_emulation.h +509 -0
  30. xmos_ai_tools/runtime/include/flatbuffers/string.h +64 -0
  31. xmos_ai_tools/runtime/include/flatbuffers/struct.h +53 -0
  32. xmos_ai_tools/runtime/include/flatbuffers/table.h +168 -0
  33. xmos_ai_tools/runtime/include/flatbuffers/util.h +690 -0
  34. xmos_ai_tools/runtime/include/flatbuffers/vector.h +370 -0
  35. xmos_ai_tools/runtime/include/flatbuffers/vector_downward.h +271 -0
  36. xmos_ai_tools/runtime/include/flatbuffers/verifier.h +283 -0
  37. xmos_ai_tools/runtime/include/ioserver.h +44 -0
  38. xmos_ai_tools/runtime/include/lib_nn/api/TransposeConv.h +24 -0
  39. xmos_ai_tools/runtime/include/lib_nn/api/add_int16.h +27 -0
  40. xmos_ai_tools/runtime/include/lib_nn/api/add_int16_transform.h +42 -0
  41. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16.h +22 -0
  42. xmos_ai_tools/runtime/include/lib_nn/api/dequantize_int16_transform.h +34 -0
  43. xmos_ai_tools/runtime/include/lib_nn/api/expand_8_to_16.h +8 -0
  44. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16.h +42 -0
  45. xmos_ai_tools/runtime/include/lib_nn/api/multiply_int16_transform.h +71 -0
  46. xmos_ai_tools/runtime/include/lib_nn/api/nn_api.h +15 -0
  47. xmos_ai_tools/runtime/include/lib_nn/api/nn_bin_types.h +14 -0
  48. xmos_ai_tools/runtime/include/lib_nn/api/nn_config.h +287 -0
  49. xmos_ai_tools/runtime/include/lib_nn/api/nn_conv2d_structs.h +72 -0
  50. xmos_ai_tools/runtime/include/lib_nn/api/nn_image.h +26 -0
  51. xmos_ai_tools/runtime/include/lib_nn/api/nn_layers.h +303 -0
  52. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_helper.h +132 -0
  53. xmos_ai_tools/runtime/include/lib_nn/api/nn_op_utils.h +150 -0
  54. xmos_ai_tools/runtime/include/lib_nn/api/nn_operator.h +18 -0
  55. xmos_ai_tools/runtime/include/lib_nn/api/nn_pooling.h +551 -0
  56. xmos_ai_tools/runtime/include/lib_nn/api/nn_types.h +83 -0
  57. xmos_ai_tools/runtime/include/lib_nn/api/nn_window_params.h +55 -0
  58. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16.h +54 -0
  59. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_kernel_transform.h +37 -0
  60. xmos_ai_tools/runtime/include/lib_nn/api/output_transform_fn_int16_mappings.h +13 -0
  61. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_approximation.h +82 -0
  62. xmos_ai_tools/runtime/include/lib_nn/api/quadratic_interpolation.h +23 -0
  63. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16.h +22 -0
  64. xmos_ai_tools/runtime/include/lib_nn/api/quantize_int16_transform.h +33 -0
  65. xmos_ai_tools/runtime/include/lib_nn/api/version.h +13 -0
  66. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memmove_word_aligned.h +15 -0
  67. xmos_ai_tools/runtime/include/lib_nn/api/vpu_memset_256.h +55 -0
  68. xmos_ai_tools/runtime/include/lib_nn/api/vpu_sim.h +118 -0
  69. xmos_ai_tools/runtime/include/lib_nn/api/xs3_vpu.h +216 -0
  70. xmos_ai_tools/runtime/include/lib_nn/api/xs3a_registers.h +2869 -0
  71. xmos_ai_tools/runtime/include/lib_nn/src/asm/asm_constants.h +41 -0
  72. xmos_ai_tools/runtime/include/lib_nn/src/asm/window_op_plan.h +25 -0
  73. xmos_ai_tools/runtime/include/lib_tflite_micro/api/fast_flash.h +47 -0
  74. xmos_ai_tools/runtime/include/lib_tflite_micro/api/inference_engine.h +218 -0
  75. xmos_ai_tools/runtime/include/lib_tflite_micro/api/memory_parallel_transport.h +52 -0
  76. xmos_ai_tools/runtime/include/lib_tflite_micro/api/version.h +13 -0
  77. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_config.h +17 -0
  78. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_device_memory.h +62 -0
  79. xmos_ai_tools/runtime/include/lib_tflite_micro/api/xcore_shared_config.h +31 -0
  80. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/conv2d_float.h +155 -0
  81. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_common.h +19 -0
  82. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_custom_options.h +28 -0
  83. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_error_reporter.h +32 -0
  84. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_interpreter.h +49 -0
  85. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_ops.h +71 -0
  86. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_profiler.h +49 -0
  87. xmos_ai_tools/runtime/include/lib_tflite_micro/src/tflite-xcore-kernels/xcore_utils.h +160 -0
  88. xmos_ai_tools/runtime/include/lib_tflite_micro/src/thread_call.h +119 -0
  89. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_defs.h +4 -0
  90. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_device.h +4 -0
  91. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_descriptors.h +4 -0
  92. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/legacy/usb_std_requests.h +4 -0
  93. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud.h +518 -0
  94. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_conf_default.h +11 -0
  95. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_device.h +87 -0
  96. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_descriptors.h +191 -0
  97. xmos_ai_tools/runtime/include/lib_xud/lib_xud/api/xud_std_requests.h +120 -0
  98. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/XUD_USB_Defines.h +70 -0
  99. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/hid.h +23 -0
  100. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio10.h +30 -0
  101. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudio20.h +357 -0
  102. xmos_ai_tools/runtime/include/lib_xud/lib_xud/src/user/class/usbaudiocommon.h +168 -0
  103. xmos_ai_tools/runtime/include/signal/micro/kernels/delay_flexbuffers_generated_data.h +25 -0
  104. xmos_ai_tools/runtime/include/signal/micro/kernels/energy_flexbuffers_generated_data.h +28 -0
  105. xmos_ai_tools/runtime/include/signal/micro/kernels/fft_flexbuffers_generated_data.h +37 -0
  106. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_flexbuffers_generated_data.h +25 -0
  107. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_log_flexbuffers_generated_data.h +27 -0
  108. xmos_ai_tools/runtime/include/signal/micro/kernels/filter_bank_spectral_subtraction_flexbuffers_generated_data.h +26 -0
  109. xmos_ai_tools/runtime/include/signal/micro/kernels/framer_flexbuffers_generated_data.h +25 -0
  110. xmos_ai_tools/runtime/include/signal/micro/kernels/irfft.h +31 -0
  111. xmos_ai_tools/runtime/include/signal/micro/kernels/overlap_add_flexbuffers_generated_data.h +25 -0
  112. xmos_ai_tools/runtime/include/signal/micro/kernels/pcan_flexbuffers_generated_data.h +7 -0
  113. xmos_ai_tools/runtime/include/signal/micro/kernels/rfft.h +31 -0
  114. xmos_ai_tools/runtime/include/signal/micro/kernels/stacker_flexbuffers_generated_data.h +25 -0
  115. xmos_ai_tools/runtime/include/signal/micro/kernels/window_flexbuffers_generated_data.h +25 -0
  116. xmos_ai_tools/runtime/include/signal/src/circular_buffer.h +118 -0
  117. xmos_ai_tools/runtime/include/signal/src/complex.h +29 -0
  118. xmos_ai_tools/runtime/include/signal/src/energy.h +38 -0
  119. xmos_ai_tools/runtime/include/signal/src/fft_auto_scale.h +35 -0
  120. xmos_ai_tools/runtime/include/signal/src/filter_bank.h +69 -0
  121. xmos_ai_tools/runtime/include/signal/src/filter_bank_log.h +38 -0
  122. xmos_ai_tools/runtime/include/signal/src/filter_bank_spectral_subtraction.h +73 -0
  123. xmos_ai_tools/runtime/include/signal/src/filter_bank_square_root.h +34 -0
  124. xmos_ai_tools/runtime/include/signal/src/irfft.h +84 -0
  125. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_common.h +49 -0
  126. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_float.h +31 -0
  127. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int16.h +30 -0
  128. xmos_ai_tools/runtime/include/signal/src/kiss_fft_wrappers/kiss_fft_int32.h +31 -0
  129. xmos_ai_tools/runtime/include/signal/src/log.h +30 -0
  130. xmos_ai_tools/runtime/include/signal/src/max_abs.h +31 -0
  131. xmos_ai_tools/runtime/include/signal/src/msb.h +32 -0
  132. xmos_ai_tools/runtime/include/signal/src/overlap_add.h +46 -0
  133. xmos_ai_tools/runtime/include/signal/src/pcan_argc_fixed.h +41 -0
  134. xmos_ai_tools/runtime/include/signal/src/rfft.h +85 -0
  135. xmos_ai_tools/runtime/include/signal/src/square_root.h +32 -0
  136. xmos_ai_tools/runtime/include/signal/src/window.h +31 -0
  137. xmos_ai_tools/runtime/include/signal/testdata/fft_test_data.h +48 -0
  138. xmos_ai_tools/runtime/include/tensorflow/lite/array.h +156 -0
  139. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_op_data.h +22 -0
  140. xmos_ai_tools/runtime/include/tensorflow/lite/builtin_ops.h +241 -0
  141. xmos_ai_tools/runtime/include/tensorflow/lite/c/builtin_op_data.h +20 -0
  142. xmos_ai_tools/runtime/include/tensorflow/lite/c/c_api_types.h +26 -0
  143. xmos_ai_tools/runtime/include/tensorflow/lite/c/common.h +30 -0
  144. xmos_ai_tools/runtime/include/tensorflow/lite/context_util.h +54 -0
  145. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/error_reporter.h +72 -0
  146. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/flatbuffer_conversions.h +440 -0
  147. xmos_ai_tools/runtime/include/tensorflow/lite/core/api/tensor_utils.h +28 -0
  148. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/builtin_op_data.h +626 -0
  149. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/c_api_types.h +178 -0
  150. xmos_ai_tools/runtime/include/tensorflow/lite/core/c/common.h +1496 -0
  151. xmos_ai_tools/runtime/include/tensorflow/lite/core/macros.h +78 -0
  152. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/bits.h +102 -0
  153. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft.h +50 -0
  154. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_io.h +34 -0
  155. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/fft_util.h +34 -0
  156. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank.h +63 -0
  157. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.h +35 -0
  158. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/filterbank_util.h +50 -0
  159. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend.h +64 -0
  160. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_io.h +31 -0
  161. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/frontend_util.h +52 -0
  162. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_common.h +48 -0
  163. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/kiss_fft_int16.h +33 -0
  164. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_lut.h +40 -0
  165. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale.h +39 -0
  166. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_io.h +33 -0
  167. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/log_scale_util.h +45 -0
  168. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction.h +46 -0
  169. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_io.h +36 -0
  170. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_util.h +50 -0
  171. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.h +47 -0
  172. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.h +57 -0
  173. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window.h +49 -0
  174. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_io.h +34 -0
  175. xmos_ai_tools/runtime/include/tensorflow/lite/experimental/microfrontend/lib/window_util.h +45 -0
  176. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/common.h +1358 -0
  177. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/compatibility.h +122 -0
  178. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/cppmath.h +40 -0
  179. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/max.h +35 -0
  180. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/min.h +35 -0
  181. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/optimized/neon_check.h +20 -0
  182. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor.h +141 -0
  183. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/portable_tensor_utils.h +623 -0
  184. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/quantization_util.h +292 -0
  185. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add.h +561 -0
  186. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/add_n.h +86 -0
  187. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/arg_min_max.h +88 -0
  188. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_matmul.h +275 -0
  189. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h +101 -0
  190. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/binary_function.h +91 -0
  191. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_args.h +56 -0
  192. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/broadcast_to.h +97 -0
  193. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/ceil.h +37 -0
  194. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/comparisons.h +271 -0
  195. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/concatenation.h +141 -0
  196. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/conv.h +289 -0
  197. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/cumsum.h +175 -0
  198. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depth_to_space.h +79 -0
  199. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h +100 -0
  200. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +319 -0
  201. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/dequantize.h +78 -0
  202. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/div.h +247 -0
  203. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/elu.h +37 -0
  204. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/exp.h +38 -0
  205. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fill.h +38 -0
  206. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor.h +39 -0
  207. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_div.h +35 -0
  208. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/floor_mod.h +44 -0
  209. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/fully_connected.h +323 -0
  210. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/hard_swish.h +168 -0
  211. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/add.h +250 -0
  212. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/conv.h +241 -0
  213. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h +291 -0
  214. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h +126 -0
  215. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/l2normalization.h +67 -0
  216. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h +121 -0
  217. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h +18 -0
  218. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/mul.h +194 -0
  219. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h +264 -0
  220. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h +117 -0
  221. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h +224 -0
  222. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/l2normalization.h +90 -0
  223. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/leaky_relu.h +69 -0
  224. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/log_softmax.h +256 -0
  225. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/logistic.h +132 -0
  226. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/lstm_cell.h +422 -0
  227. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/maximum_minimum.h +64 -0
  228. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/mul.h +267 -0
  229. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/neg.h +37 -0
  230. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pad.h +169 -0
  231. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/pooling.h +303 -0
  232. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h +333 -0
  233. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h +244 -0
  234. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/prelu.h +111 -0
  235. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h +140 -0
  236. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/quantize.h +89 -0
  237. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/reduce.h +491 -0
  238. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/requantize.h +70 -0
  239. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_bilinear.h +233 -0
  240. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h +102 -0
  241. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/round.h +51 -0
  242. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/select.h +151 -0
  243. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/slice.h +80 -0
  244. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/softmax.h +233 -0
  245. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h +109 -0
  246. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/space_to_depth.h +80 -0
  247. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/strided_slice.h +147 -0
  248. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/sub.h +465 -0
  249. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/tanh.h +129 -0
  250. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose.h +203 -0
  251. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/reference/transpose_conv.h +225 -0
  252. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/runtime_shape.h +168 -0
  253. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/strided_slice_logic.h +278 -0
  254. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/tensor_ctypes.h +42 -0
  255. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/internal/types.h +1096 -0
  256. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/kernel_util.h +341 -0
  257. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/op_macros.h +49 -0
  258. xmos_ai_tools/runtime/include/tensorflow/lite/kernels/padding.h +115 -0
  259. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/ibuffer_allocator.h +100 -0
  260. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/non_persistent_arena_buffer_allocator.h +104 -0
  261. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/persistent_arena_buffer_allocator.h +58 -0
  262. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/recording_single_arena_buffer_allocator.h +63 -0
  263. xmos_ai_tools/runtime/include/tensorflow/lite/micro/arena_allocator/single_arena_buffer_allocator.h +144 -0
  264. xmos_ai_tools/runtime/include/tensorflow/lite/micro/benchmarks/micro_benchmark.h +95 -0
  265. xmos_ai_tools/runtime/include/tensorflow/lite/micro/compatibility.h +32 -0
  266. xmos_ai_tools/runtime/include/tensorflow/lite/micro/cortex_m_generic/debug_log_callback.h +49 -0
  267. xmos_ai_tools/runtime/include/tensorflow/lite/micro/debug_log.h +38 -0
  268. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/micro_speech/micro_model_settings.h +37 -0
  269. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/expected_output_data.h +47 -0
  270. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/input_data.h +108 -0
  271. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/network_tester/network_model.h +166 -0
  272. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/detection_responder.h +32 -0
  273. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/image_provider.h +38 -0
  274. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/main_functions.h +37 -0
  275. xmos_ai_tools/runtime/include/tensorflow/lite/micro/examples/person_detection/model_settings.h +35 -0
  276. xmos_ai_tools/runtime/include/tensorflow/lite/micro/fake_micro_context.h +70 -0
  277. xmos_ai_tools/runtime/include/tensorflow/lite/micro/flatbuffer_utils.h +65 -0
  278. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activation_utils.h +57 -0
  279. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/activations.h +64 -0
  280. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/add.h +78 -0
  281. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_function_specializations.h +141 -0
  282. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_interface.h +75 -0
  283. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_slicers.h +56 -0
  284. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/mli_tf_utils.h +310 -0
  285. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buf_mgr.h +145 -0
  286. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/arc_mli/scratch_buffers.h +78 -0
  287. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_common.h +24 -0
  288. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h +613 -0
  289. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/mcps_macros.h +115 -0
  290. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ceva/types.h +1286 -0
  291. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer.h +45 -0
  292. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/circular_buffer_flexbuffers_generated_data.h +22 -0
  293. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv.h +117 -0
  294. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/conv_test.h +94 -0
  295. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/depthwise_conv.h +80 -0
  296. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/dequantize.h +38 -0
  297. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/detection_postprocess_flexbuffers_generated_data.h +25 -0
  298. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/ethosu.h +28 -0
  299. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/fully_connected.h +112 -0
  300. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/hard_swish.h +30 -0
  301. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_runner.h +86 -0
  302. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/kernel_util.h +150 -0
  303. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/leaky_relu.h +43 -0
  304. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logical.h +35 -0
  305. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/logistic.h +42 -0
  306. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval.h +541 -0
  307. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_eval_test.h +817 -0
  308. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/lstm_shared.h +150 -0
  309. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_ops.h +158 -0
  310. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/micro_tensor_utils.h +56 -0
  311. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/mul.h +74 -0
  312. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pad.h +27 -0
  313. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/pooling.h +142 -0
  314. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/prelu.h +39 -0
  315. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/quantize.h +37 -0
  316. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reduce.h +65 -0
  317. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/reshape.h +26 -0
  318. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/softmax.h +67 -0
  319. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/strided_slice.h +40 -0
  320. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/sub.h +60 -0
  321. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/svdf.h +100 -0
  322. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/conv_test_data.h +37 -0
  323. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/testdata/lstm_test_data.h +579 -0
  324. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/unidirectional_sequence_lstm.h +47 -0
  325. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/hifimini/fixedpoint_utils.h +139 -0
  326. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_eval.h +216 -0
  327. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/lstm_shared.h +78 -0
  328. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa.h +38 -0
  329. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_add.h +48 -0
  330. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_conv.h +89 -0
  331. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_depthwise_conv.h +74 -0
  332. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_fully_connected.h +78 -0
  333. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pad.h +49 -0
  334. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_pooling.h +76 -0
  335. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reduce.h +47 -0
  336. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_reshape.h +44 -0
  337. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_softmax.h +58 -0
  338. xmos_ai_tools/runtime/include/tensorflow/lite/micro/kernels/xtensa/xtensa_svdf.h +39 -0
  339. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_helpers.h +64 -0
  340. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/greedy_memory_planner.h +170 -0
  341. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/linear_memory_planner.h +53 -0
  342. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/memory_plan_struct.h +73 -0
  343. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/micro_memory_planner.h +95 -0
  344. xmos_ai_tools/runtime/include/tensorflow/lite/micro/memory_planner/non_persistent_buffer_planner_shim.h +133 -0
  345. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocation_info.h +138 -0
  346. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_allocator.h +351 -0
  347. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_arena_constants.h +28 -0
  348. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_common.h +38 -0
  349. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_context.h +176 -0
  350. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_graph.h +79 -0
  351. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter.h +189 -0
  352. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_context.h +125 -0
  353. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_interpreter_graph.h +110 -0
  354. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_log.h +42 -0
  355. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_mutable_op_resolver.h +708 -0
  356. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_op_resolver.h +62 -0
  357. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler.h +140 -0
  358. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_profiler_interface.h +38 -0
  359. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_resource_variable.h +89 -0
  360. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_time.h +36 -0
  361. xmos_ai_tools/runtime/include/tensorflow/lite/micro/micro_utils.h +162 -0
  362. xmos_ai_tools/runtime/include/tensorflow/lite/micro/mock_micro_graph.h +60 -0
  363. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/interpreter/src/python_ops_resolver.h +21 -0
  364. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size.h +30 -0
  365. xmos_ai_tools/runtime/include/tensorflow/lite/micro/python/tflite_size/src/flatbuffer_size_wrapper.h +33 -0
  366. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_allocator.h +125 -0
  367. xmos_ai_tools/runtime/include/tensorflow/lite/micro/recording_micro_interpreter.h +69 -0
  368. xmos_ai_tools/runtime/include/tensorflow/lite/micro/system_setup.h +27 -0
  369. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helper_custom_ops.h +49 -0
  370. xmos_ai_tools/runtime/include/tensorflow/lite/micro/test_helpers.h +334 -0
  371. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/micro_test.h +267 -0
  372. xmos_ai_tools/runtime/include/tensorflow/lite/micro/testing/test_conv_model.h +23 -0
  373. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/flatbuffer_conversions_bridge.h +45 -0
  374. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tflite_bridge/micro_error_reporter.h +36 -0
  375. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/log_utils.h +273 -0
  376. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/metrics.h +41 -0
  377. xmos_ai_tools/runtime/include/tensorflow/lite/micro/tools/benchmarking/op_resolver.h +127 -0
  378. xmos_ai_tools/runtime/include/tensorflow/lite/portable_type_to_tflitetype.h +75 -0
  379. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_generated.h +24644 -0
  380. xmos_ai_tools/runtime/include/tensorflow/lite/schema/schema_utils.h +33 -0
  381. xmos_ai_tools/runtime/include/tile_ram_server.h +38 -0
  382. xmos_ai_tools/runtime/lib/libhost_xtflitemicro.a +0 -0
  383. xmos_ai_tools/runtime/lib/libxtflitemicro.a +0 -0
  384. xmos_ai_tools/xformer/__init__.py +60 -0
  385. xmos_ai_tools/xformer/flash.py +190 -0
  386. xmos_ai_tools/xinterpreters/__init__.py +1 -0
  387. xmos_ai_tools/xinterpreters/exceptions.py +38 -0
  388. xmos_ai_tools/xinterpreters/host_interpreter.py +652 -0
  389. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.1.0.1.dylib +0 -0
  390. xmos_ai_tools/xinterpreters/libs/macos/xtflm_python.dylib +0 -0
  391. xmos_ai_tools-1.3.2.dev80.data/data/bin/xcore-opt +0 -0
  392. xmos_ai_tools-1.3.2.dev80.dist-info/METADATA +33 -0
  393. xmos_ai_tools-1.3.2.dev80.dist-info/RECORD +395 -0
  394. xmos_ai_tools-1.3.2.dev80.dist-info/WHEEL +5 -0
  395. xmos_ai_tools-1.3.2.dev80.dist-info/top_level.txt +1 -0
@@ -0,0 +1,132 @@
1
+ /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
17
+
18
+ #include <cmath>
19
+
20
+ #include "fixedpoint/fixedpoint.h"
21
+ #include "tensorflow/lite/kernels/internal/common.h"
22
+ #include "tensorflow/lite/kernels/internal/cppmath.h"
23
+ #include "tensorflow/lite/kernels/internal/quantization_util.h"
24
+ #include "tensorflow/lite/kernels/internal/types.h"
25
+ #include "tensorflow/lite/kernels/op_macros.h"
26
+
27
+ namespace tflite_micro {
28
+ namespace reference_ops {
29
+
30
+ inline void Logistic(const RuntimeShape& input_shape, const float* input_data,
31
+ const RuntimeShape& output_shape, float* output_data) {
32
+ const float cutoff_upper = 16.619047164916992188f;
33
+ const float cutoff_lower = -9.f;
34
+
35
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
36
+
37
+ // Rational for using approximation in reference kernel.
38
+ // 0. This approximation gives enough precision for float.
39
+ // 1. This works around an issue on an embedded chipset where exp() does not
40
+ // return correctly as expected - exp(x) should return inf when overflown
41
+ // not 1.701417 IEEE 754 defines representation for inf.
42
+ // 2. This will speed up calculation and is matching the behavior in the
43
+ // optimized kernels. (check the definition of scalar_logistic_op<float>)
44
+
45
+ for (int i = 0; i < flat_size; i++) {
46
+ float val = input_data[i];
47
+ float result;
48
+ if (val > cutoff_upper) {
49
+ result = 1.0f;
50
+ } else if (val < cutoff_lower) {
51
+ result = std::exp(val);
52
+ } else {
53
+ result = 1.f / (1.f + std::exp(-val));
54
+ }
55
+ output_data[i] = result;
56
+ }
57
+ }
58
+
59
+ // Convenience version that allows, for example, generated-code calls to be
60
+ // uniform between data types.
61
+ inline void Logistic(const LogisticParams&, const RuntimeShape& input_shape,
62
+ const float* input_data, const RuntimeShape& output_shape,
63
+ float* output_data) {
64
+ // Drop params: not needed.
65
+ Logistic(input_shape, input_data, output_shape, output_data);
66
+ }
67
+
68
+ inline void Logistic(const LogisticParams& params,
69
+ const RuntimeShape& input_shape, const int16_t* input_data,
70
+ const RuntimeShape& output_shape, int16_t* output_data) {
71
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
72
+
73
+ for (int i = 0; i < flat_size; i++) {
74
+ // F0 uses 0 integer bits, range [-1, 1].
75
+ // This is the return type of math functions such as tanh, logistic,
76
+ // whose range is in [-1, 1].
77
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
78
+ // F3 uses 3 integer bits, range [-8, 8], the input range expected here.
79
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
80
+
81
+ const F3 input = F3::FromRaw(input_data[i]);
82
+ F0 output = gemmlowp::logistic(input);
83
+ output_data[i] = output.raw();
84
+ }
85
+ }
86
+
87
+ // Quantized int8_t logistic activation. Cheats by dequantizing and
88
+ // requantizing around the floating point logistic method. This implementation
89
+ // is slow on platforms without a floating point unit.
90
+
91
+ // TODO(b/141211002): Delete this int8_t implementation once we can reuse the
92
+ // approach used in TFLite for int8_t Logistic.
93
+ inline void Logistic(const RuntimeShape& input_shape, const int8_t* input_data,
94
+ float input_scale, int input_zero_point,
95
+ const RuntimeShape& output_shape, int8_t* output_data,
96
+ float output_scale, int output_zero_point) {
97
+ const float cutoff_upper = 16.619047164916992188f;
98
+ const float cutoff_lower = -9.f;
99
+
100
+ const int flat_size = MatchingFlatSize(input_shape, output_shape);
101
+
102
+ // Rational for using approximation in reference kernel.
103
+ // 0. This approximation gives enough precision for float.
104
+ // 1. This works around an issue on an embedded chipset where exp() does not
105
+ // return correctly as expected - exp(x) should return inf when overflown
106
+ // not 1.701417 IEEE 754 defines representation for inf.
107
+ // 2. This will speed up calculation and is matching the behavior in the
108
+ // optimized kernels. (check the definition of scalar_logistic_op<float>)
109
+
110
+ for (int i = 0; i < flat_size; i++) {
111
+ // Dequantize.
112
+ float val =
113
+ static_cast<float>((input_data[i] - input_zero_point) * input_scale);
114
+ float result;
115
+ if (val > cutoff_upper) {
116
+ result = 1.0f;
117
+ } else if (val < cutoff_lower) {
118
+ result = std::exp(val);
119
+ } else {
120
+ result = 1.f / (1.f + std::exp(-val));
121
+ }
122
+ // Requantize
123
+ int8_t output =
124
+ static_cast<int8_t>(result / output_scale + output_zero_point);
125
+ output_data[i] = output;
126
+ }
127
+ }
128
+
129
+ } // namespace reference_ops
130
+ } // namespace tflite_micro
131
+
132
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LOGISTIC_H_
@@ -0,0 +1,422 @@
1
+ /* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
17
+
18
+ #include <algorithm>
19
+ #include <cmath>
20
+ #include <cstdint>
21
+
22
+ #include "tensorflow/lite/kernels/internal/common.h"
23
+ #include "tensorflow/lite/kernels/internal/reference/concatenation.h"
24
+ #include "tensorflow/lite/kernels/internal/reference/fully_connected.h"
25
+ #include "tensorflow/lite/kernels/internal/types.h"
26
+
27
+ namespace tflite_micro {
28
+ namespace reference_ops {
29
+
30
+ inline void LstmCell(
31
+ const LstmCellParams& params, const RuntimeShape& unextended_input_shape,
32
+ const float* input_data, const RuntimeShape& unextended_prev_activ_shape,
33
+ const float* prev_activ_data, const RuntimeShape& weights_shape,
34
+ const float* weights_data, const RuntimeShape& unextended_bias_shape,
35
+ const float* bias_data, const RuntimeShape& unextended_prev_state_shape,
36
+ const float* prev_state_data,
37
+ const RuntimeShape& unextended_output_state_shape, float* output_state_data,
38
+ const RuntimeShape& unextended_output_activ_shape, float* output_activ_data,
39
+ const RuntimeShape& unextended_concat_temp_shape, float* concat_temp_data,
40
+ const RuntimeShape& unextended_activ_temp_shape, float* activ_temp_data) {
41
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
42
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
43
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
44
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
45
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
46
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
47
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
48
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
49
+ const RuntimeShape input_shape =
50
+ RuntimeShape::ExtendedShape(4, unextended_input_shape);
51
+ const RuntimeShape prev_activ_shape =
52
+ RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
53
+ const RuntimeShape bias_shape =
54
+ RuntimeShape::ExtendedShape(4, unextended_bias_shape);
55
+ const RuntimeShape prev_state_shape =
56
+ RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
57
+ const RuntimeShape output_state_shape =
58
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
59
+ const RuntimeShape output_activ_shape =
60
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
61
+ const RuntimeShape concat_temp_shape =
62
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
63
+ const RuntimeShape activ_temp_shape =
64
+ RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
65
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
66
+
67
+ const int weights_dim_count = weights_shape.DimensionsCount();
68
+ const int batches =
69
+ MatchingDim(input_shape, 0, prev_activ_shape, 0, prev_state_shape, 0,
70
+ output_state_shape, 0, output_activ_shape, 0);
71
+ const int height =
72
+ MatchingDim(input_shape, 1, prev_activ_shape, 1, prev_state_shape, 1,
73
+ output_state_shape, 1, output_activ_shape, 1);
74
+ const int width =
75
+ MatchingDim(input_shape, 2, prev_activ_shape, 2, prev_state_shape, 2,
76
+ output_state_shape, 2, output_activ_shape, 2);
77
+ const int input_depth = input_shape.Dims(3);
78
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
79
+ const int total_input_depth = prev_activ_depth + input_depth;
80
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
81
+ total_input_depth);
82
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
83
+ const int intern_activ_depth =
84
+ MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
85
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
86
+ intern_activ_depth * total_input_depth);
87
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
88
+ const int output_depth =
89
+ MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
90
+ 3, output_activ_shape, 3);
91
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
92
+
93
+ // Concatenate prev_activ and input data together
94
+ float const* concat_input_arrays_data[2] = {input_data, prev_activ_data};
95
+ const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
96
+ &prev_activ_shape};
97
+ tflite_micro::ConcatenationParams concat_params;
98
+ concat_params.axis = 3;
99
+ concat_params.inputs_count = 2;
100
+ Concatenation(concat_params, concat_input_arrays_shapes,
101
+ concat_input_arrays_data, concat_temp_shape, concat_temp_data);
102
+
103
+ // Fully connected
104
+ tflite_micro::FullyConnectedParams fc_params;
105
+ fc_params.float_activation_min = std::numeric_limits<float>::lowest();
106
+ fc_params.float_activation_max = std::numeric_limits<float>::max();
107
+ FullyConnected(fc_params, concat_temp_shape, concat_temp_data, weights_shape,
108
+ weights_data, bias_shape, bias_data, activ_temp_shape,
109
+ activ_temp_data);
110
+
111
+ // Memory state update (the LSTM "guts")
112
+ for (int b = 0; b < batches; ++b) {
113
+ for (int w = 0; w < width; ++w) {
114
+ for (int h = 0; h < height; ++h) {
115
+ for (int c = 0; c < output_depth; ++c) {
116
+ const float input_gate =
117
+ 1.f /
118
+ (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
119
+ 0 * output_depth + c)]));
120
+ const float new_input = std::tanh(activ_temp_data[Offset(
121
+ activ_temp_shape, b, h, w, 1 * output_depth + c)]);
122
+ const float forget_gate =
123
+ 1.f /
124
+ (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
125
+ 2 * output_depth + c)]));
126
+ const float output_gate =
127
+ 1.f /
128
+ (1.f + std::exp(-activ_temp_data[Offset(activ_temp_shape, b, h, w,
129
+ 3 * output_depth + c)]));
130
+ const float new_state =
131
+ input_gate * new_input +
132
+ forget_gate *
133
+ prev_state_data[Offset(prev_state_shape, b, h, w, c)];
134
+ output_state_data[Offset(output_state_shape, b, h, w, c)] = new_state;
135
+ output_activ_data[Offset(output_activ_shape, b, h, w, c)] =
136
+ output_gate * std::tanh(new_state);
137
+ }
138
+ }
139
+ }
140
+ }
141
+ }
142
+
143
+ // Quantized LSTM cell implementation.
144
+ // The quantization of the input, output arrays is as follows:
145
+ // - The input activations are quantized as uint8 on the interval
146
+ // [-1, 127/128].
147
+ // The rationale for that is that is the natural interval for output
148
+ // activations (see next point) and these need to be concatenated together.
149
+ // We could accommodate different ranges by re-scaling, but we empirically
150
+ // found that setting the input activations range to be [-1, 127/128] in the
151
+ // first place, removing the need for re-scaling, greatly improves accuracy.
152
+ // - The output activations are quantized as uint8 on the interval
153
+ // [-1, 127/128].
154
+ // The rationale for that is that the definition of a LSTM cell makes them
155
+ // intrinsically constrained in [-1, 1]; tweaking that to [-1, 127/128]
156
+ // makes for simpler, more accurate fixed-point arithmetic.
157
+ // - The output-at-previous-timestep state array is obviously quantized as
158
+ // the output activations.
159
+ // - The internal LSTM memory (not the output-at-previous-timestep, the other
160
+ // internal state array) is int16-quantized and may use any power-of-two,
161
+ // symmetric range i.e. [-2^N, 2^N * 32767/32768] for any N, which we call
162
+ // StateIntegerBits below, see the below discussion of that template
163
+ // parameter ("The StateIntegerBits template parameter").
164
+ // - The output of the internal fully-connected node is int16-quantized
165
+ // on the interval [-8, 8 * 32767/32768], the rationale for which is
166
+ // explained just below ("Why [-8, 8] for fully-connected output?").
167
+ //
168
+ //
169
+ // === The StateIntegerBits template parameter ===
170
+ //
171
+ // The StateIntegerBits template parameter controls the fixed-point format used
172
+ // to represent the internal memory of the LSTM cell (not the
173
+ // output-at-previous-timestep, the other internal state array). It's currently
174
+ // a template parameter so that the model can control that. The most typical
175
+ // value for StateIntegerBits is 4. Other plausible values are anywhere between
176
+ // 3 and 5. We might eventually standardize on a single supported value, e.g. 4,
177
+ // and drop that template parameter. The reason why it can't be a runtime
178
+ // parameter is that this controls the fixed-point format used, i.e. we need to
179
+ // generate actually different code based on it. In particular, we generate code
180
+ // for a fixed-point tanh() implementation for that format, which internally
181
+ // uses a fixed-point exp() implementation, which internally uses a
182
+ // barrel-shifter with a number of steps that depends on StateIntegerBits.
183
+ // Another consequence of that is that a higher value of StateIntegerBits
184
+ // results in a more expensive implementation (more barrel shifter steps
185
+ // needed).
186
+ //
187
+ //
188
+ // === Why [-8, 8] for fully-connected output? ===
189
+ //
190
+ // This array is only fed to Logistic and Tanh functions, for which
191
+ // the quantized implementation will want to use fixed-point arithmetic,
192
+ // requiring a power-of-two representation interval. Thus, we should right
193
+ // away quantize this array to a power-of-two interval; otherwise,
194
+ // implementation will need to rescale that, losing any benefit that a tighter
195
+ // representation interval might otherwise yield, while introducing some
196
+ // numerical error and computational overhead.
197
+ //
198
+ // Now, Logistic and Tanh
199
+ // are nearly constant (nearly equal to their horizontal asymptotes)
200
+ // outside of a small bounded interval around 0:
201
+ //
202
+ // Logistic(4) = 1 - 1.8e-2 Tanh(4) = 1 - 6.7e-4
203
+ // Logistic(8) = 1 - 3.4e-4 Tanh(8) = 1 - 2.3e-7
204
+ // Logistic(16) = 1 - 1.1e-7 Tanh(16) = 1 - 2.5e-14
205
+ //
206
+ // From this, we see that clamping to [-4, 4] would be too inaccurate
207
+ // (the error of 1.8e-2 on Logistic would be felt even in 8bit precision)
208
+ // while clamping to [-16, 16] would make no difference even in float32.
209
+ // However, for a fixed-point implementation in 16-bit integers, using 5
210
+ // integer bits to represent the [-16, 16] range would leave only 11
211
+ // fractional bits, giving an increment of 2^-11 = 4.9e-4 between consecutive
212
+ // representable values. Notice that is higher than the
213
+ // worst-case clamping error with clamping to [-8, 8]: 3.4e-4 for Logistic.
214
+ // Using [-8, 8] thus seems like the better compromise overall, enjoying
215
+ // an increment of 2.4e-4 between representable values and a worst-case
216
+ // clamping error of 3.4e-4, both better than the increment of 4.9e-4 with
217
+ // [-16, 16].
218
+ //
219
+ // Moreover, all other things being equal, it is nice to choose the narrower
220
+ // representation range, as that makes the implementation of fixed-point
221
+ // math functions a little cheaper (each integer bit requires an additional
222
+ // barrel-shifter atep in the implementation of exp(-x)). That is further
223
+ // reason to prefer [-8, 8] over [-16, 16]. The choice of [-16, 16] would make
224
+ // sense for 32-bit float or 32-bit fixed-point quantization, but we are
225
+ // aiming for 16-bit fixed-point quantization of these internal nodes here.
226
+ //
227
+ template <int StateIntegerBits>
228
+ inline void LstmCell(const LstmCellParams& params,
229
+ const RuntimeShape& unextended_input_shape,
230
+ const uint8_t* input_data_uint8,
231
+ const RuntimeShape& unextended_prev_activ_shape,
232
+ const uint8_t* prev_activ_data_uint8,
233
+ const RuntimeShape& weights_shape,
234
+ const uint8_t* weights_data_uint8,
235
+ const RuntimeShape& unextended_bias_shape,
236
+ const int32_t* bias_data_int32,
237
+ const RuntimeShape& unextended_prev_state_shape,
238
+ const int16_t* prev_state_data_int16,
239
+ const RuntimeShape& unextended_output_state_shape,
240
+ int16_t* output_state_data_int16,
241
+ const RuntimeShape& unextended_output_activ_shape,
242
+ uint8_t* output_activ_data_uint8,
243
+ const RuntimeShape& unextended_concat_temp_shape,
244
+ uint8_t* concat_temp_data_uint8,
245
+ const RuntimeShape& unextended_activ_temp_shape,
246
+ int16_t* activ_temp_data_int16, void* gemmlowp_context) {
247
+ (void)gemmlowp_context; // only used in optimized code.
248
+ int32_t weights_zero_point = params.weights_zero_point;
249
+ int32_t accum_multiplier = params.accum_multiplier;
250
+ int accum_shift = params.accum_shift;
251
+ TFLITE_DCHECK_LE(unextended_input_shape.DimensionsCount(), 4);
252
+ TFLITE_DCHECK_LE(unextended_prev_activ_shape.DimensionsCount(), 4);
253
+ TFLITE_DCHECK_LE(unextended_bias_shape.DimensionsCount(), 4);
254
+ TFLITE_DCHECK_LE(unextended_prev_state_shape.DimensionsCount(), 4);
255
+ TFLITE_DCHECK_LE(unextended_output_state_shape.DimensionsCount(), 4);
256
+ TFLITE_DCHECK_LE(unextended_output_activ_shape.DimensionsCount(), 4);
257
+ TFLITE_DCHECK_LE(unextended_concat_temp_shape.DimensionsCount(), 4);
258
+ TFLITE_DCHECK_LE(unextended_activ_temp_shape.DimensionsCount(), 4);
259
+ const RuntimeShape input_shape =
260
+ RuntimeShape::ExtendedShape(4, unextended_input_shape);
261
+ const RuntimeShape prev_activ_shape =
262
+ RuntimeShape::ExtendedShape(4, unextended_prev_activ_shape);
263
+ const RuntimeShape bias_shape =
264
+ RuntimeShape::ExtendedShape(4, unextended_bias_shape);
265
+ const RuntimeShape prev_state_shape =
266
+ RuntimeShape::ExtendedShape(4, unextended_prev_state_shape);
267
+ const RuntimeShape output_state_shape =
268
+ RuntimeShape::ExtendedShape(4, unextended_output_state_shape);
269
+ const RuntimeShape output_activ_shape =
270
+ RuntimeShape::ExtendedShape(4, unextended_output_activ_shape);
271
+ const RuntimeShape concat_temp_shape =
272
+ RuntimeShape::ExtendedShape(4, unextended_concat_temp_shape);
273
+ const RuntimeShape activ_temp_shape =
274
+ RuntimeShape::ExtendedShape(4, unextended_activ_temp_shape);
275
+ TFLITE_DCHECK_GE(weights_shape.DimensionsCount(), 2);
276
+
277
+ // Gather dimensions information, and perform consistency checks.
278
+ const int weights_dim_count = weights_shape.DimensionsCount();
279
+ const int outer_size = MatchingFlatSizeSkipDim(
280
+ input_shape, 3, prev_activ_shape, prev_state_shape, output_state_shape,
281
+ output_activ_shape);
282
+ const int input_depth = input_shape.Dims(3);
283
+ const int prev_activ_depth = prev_activ_shape.Dims(3);
284
+ const int total_input_depth = prev_activ_depth + input_depth;
285
+ TFLITE_DCHECK_EQ(weights_shape.Dims(weights_dim_count - 1),
286
+ total_input_depth);
287
+ const int intern_activ_depth =
288
+ MatchingDim(weights_shape, weights_dim_count - 2, bias_shape, 3);
289
+ TFLITE_DCHECK_EQ(weights_shape.FlatSize(),
290
+ intern_activ_depth * total_input_depth);
291
+ TFLITE_DCHECK_EQ(FlatSizeSkipDim(bias_shape, 3), 1);
292
+ TFLITE_DCHECK_EQ(intern_activ_depth % 4, 0);
293
+ const int output_depth =
294
+ MatchingDim(prev_state_shape, 3, prev_activ_shape, 3, output_state_shape,
295
+ 3, output_activ_shape, 3);
296
+ TFLITE_DCHECK_EQ(output_depth, intern_activ_depth / 4);
297
+ const int fc_batches = FlatSizeSkipDim(activ_temp_shape, 3);
298
+ const int fc_output_depth =
299
+ MatchingDim(weights_shape, weights_dim_count - 2, activ_temp_shape, 3);
300
+ const int fc_accum_depth = total_input_depth;
301
+ TFLITE_DCHECK_EQ(fc_output_depth, 4 * output_depth);
302
+
303
+ // Depth-concatenate prev_activ and input data together.
304
+ uint8_t const* concat_input_arrays_data[2] = {input_data_uint8,
305
+ prev_activ_data_uint8};
306
+ const RuntimeShape* concat_input_arrays_shapes[2] = {&input_shape,
307
+ &prev_activ_shape};
308
+ tflite_micro::ConcatenationParams concat_params;
309
+ concat_params.axis = 3;
310
+ concat_params.inputs_count = 2;
311
+ Concatenation(concat_params, concat_input_arrays_shapes,
312
+ concat_input_arrays_data, concat_temp_shape,
313
+ concat_temp_data_uint8);
314
+
315
+ // Implementation of the fully connected node inside the LSTM cell.
316
+ // The operands are 8-bit integers, the accumulators are internally 32bit
317
+ // integers, and the output is 16-bit fixed-point with 3 integer bits so
318
+ // the output range is [-2^3, 2^3] == [-8, 8]. The rationale for that
319
+ // is explained in the function comment above.
320
+ for (int b = 0; b < fc_batches; ++b) {
321
+ for (int out_c = 0; out_c < fc_output_depth; ++out_c) {
322
+ // Internal accumulation.
323
+ // Initialize accumulator with the bias-value.
324
+ int32_t accum = bias_data_int32[out_c];
325
+ // Accumulation loop.
326
+ for (int d = 0; d < fc_accum_depth; ++d) {
327
+ int16_t input_val =
328
+ concat_temp_data_uint8[b * fc_accum_depth + d] - 128;
329
+ int16_t weights_val =
330
+ weights_data_uint8[out_c * fc_accum_depth + d] - weights_zero_point;
331
+ accum += input_val * weights_val;
332
+ }
333
+ // Down-scale the final int32 accumulator to the scale used by our
334
+ // (16-bit, using 3 integer bits) fixed-point format. The quantized
335
+ // multiplier and shift here have been pre-computed offline
336
+ // (e.g. by toco).
337
+ accum =
338
+ MultiplyByQuantizedMultiplier(accum, accum_multiplier, accum_shift);
339
+ // Saturate, cast to int16, and store to the temporary activations array.
340
+ accum = std::max(-32768, std::min(32767, accum));
341
+ activ_temp_data_int16[out_c + fc_output_depth * b] = accum;
342
+ }
343
+ }
344
+
345
+ // Rest of the LSTM cell: tanh and logistic math functions, and some adds
346
+ // and muls, all done in 16-bit fixed-point.
347
+ for (int b = 0; b < outer_size; ++b) {
348
+ for (int c = 0; c < output_depth; ++c) {
349
+ // Define the fixed-point data types that we will use here. All use
350
+ // int16 as the underlying integer type i.e. all are 16-bit fixed-point.
351
+ // They only differ by the number of integral vs. fractional bits,
352
+ // determining the range of values that they can represent.
353
+ //
354
+ // F0 uses 0 integer bits, range [-1, 1].
355
+ // This is the return type of math functions such as tanh, logistic,
356
+ // whose range is in [-1, 1].
357
+ using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
358
+ // F3 uses 3 integer bits, range [-8, 8].
359
+ // This is the range of the previous fully-connected node's output,
360
+ // which is our input here.
361
+ using F3 = gemmlowp::FixedPoint<std::int16_t, 3>;
362
+ // FS uses StateIntegerBits integer bits, range [-2^StateIntegerBits,
363
+ // 2^StateIntegerBits]. It's used to represent the internal state, whose
364
+ // number of integer bits is currently dictated by the model. See comment
365
+ // on the StateIntegerBits template parameter above.
366
+ using FS = gemmlowp::FixedPoint<std::int16_t, StateIntegerBits>;
367
+ // Implementation of input gate, using fixed-point logistic function.
368
+ F3 input_gate_input = F3::FromRaw(
369
+ activ_temp_data_int16[b * fc_output_depth + 0 * output_depth + c]);
370
+ F0 input_gate_output = gemmlowp::logistic(input_gate_input);
371
+ // Implementation of input modulation gate, using fixed-point tanh
372
+ // function.
373
+ F3 input_modulation_gate_input = F3::FromRaw(
374
+ activ_temp_data_int16[b * fc_output_depth + 1 * output_depth + c]);
375
+ F0 input_modulation_gate_output =
376
+ gemmlowp::tanh(input_modulation_gate_input);
377
+ // Implementation of forget gate, using fixed-point logistic function.
378
+ F3 forget_gate_input = F3::FromRaw(
379
+ activ_temp_data_int16[b * fc_output_depth + 2 * output_depth + c]);
380
+ F0 forget_gate_output = gemmlowp::logistic(forget_gate_input);
381
+ // Implementation of output gate, using fixed-point logistic function.
382
+ F3 output_gate_input = F3::FromRaw(
383
+ activ_temp_data_int16[b * fc_output_depth + 3 * output_depth + c]);
384
+ F0 output_gate_output = gemmlowp::logistic(output_gate_input);
385
+ // Implementation of internal multiplication nodes, still in fixed-point.
386
+ F0 input_times_input_modulation =
387
+ input_gate_output * input_modulation_gate_output;
388
+ FS prev_state = FS::FromRaw(prev_state_data_int16[b * output_depth + c]);
389
+ FS prev_state_times_forget_state = forget_gate_output * prev_state;
390
+ // Implementation of internal addition node, saturating.
391
+ FS new_state = gemmlowp::SaturatingAdd(
392
+ gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
393
+ prev_state_times_forget_state);
394
+ // Implementation of last internal Tanh node, still in fixed-point.
395
+ // Since a Tanh fixed-point implementation is specialized for a given
396
+ // number or integer bits, and each specialization can have a substantial
397
+ // code size, and we already used above a Tanh on an input with 3 integer
398
+ // bits, and per the table in the above function comment there is no
399
+ // significant accuracy to be lost by clamping to [-8, +8] for a
400
+ // 3-integer-bits representation, let us just do that. This helps people
401
+ // porting this to targets where code footprint must be minimized.
402
+ F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
403
+ F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
404
+ // Store the new internal state back to memory, as 16-bit integers.
405
+ // Note: here we store the original value with StateIntegerBits, not
406
+ // the rescaled 3-integer-bits value fed to tanh.
407
+ output_state_data_int16[b * output_depth + c] = new_state.raw();
408
+ // Down-scale the output activations to 8-bit integers, saturating,
409
+ // and store back to memory.
410
+ int16_t rescaled_output_activ =
411
+ gemmlowp::RoundingDivideByPOT(output_activ_int16.raw(), 8);
412
+ int16_t clamped_output_activ = std::max<int16_t>(
413
+ -128, std::min<int16_t>(127, rescaled_output_activ));
414
+ output_activ_data_uint8[b * output_depth + c] =
415
+ 128 + clamped_output_activ;
416
+ }
417
+ }
418
+ }
419
+
420
+ } // namespace reference_ops
421
+ } // namespace tflite_micro
422
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_LSTM_CELL_H_
@@ -0,0 +1,64 @@
1
+ /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+ ==============================================================================*/
15
+ #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
16
+ #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_
17
+
18
+ #include "tensorflow/lite/kernels/internal/common.h"
19
+ #include "tensorflow/lite/kernels/internal/types.h"
20
+
21
+ namespace tflite_micro {
22
+ namespace reference_ops {
23
+
24
+ template <typename T, typename Op, int N = 5>
25
+ void MaximumMinimumBroadcastSlow(const RuntimeShape& unextended_input1_shape,
26
+ const T* input1_data,
27
+ const RuntimeShape& unextended_input2_shape,
28
+ const T* input2_data,
29
+ const RuntimeShape& unextended_output_shape,
30
+ T* output_data, Op op) {
31
+ // Uses element-wise calculation if broadcast is not required.
32
+ if (unextended_input1_shape == unextended_input2_shape) {
33
+ const int flat_size =
34
+ MatchingElementsSize(unextended_input1_shape, unextended_input2_shape,
35
+ unextended_output_shape);
36
+ for (int i = 0; i < flat_size; ++i) {
37
+ output_data[i] = op(input1_data[i], input2_data[i]);
38
+ }
39
+ } else {
40
+ TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), N);
41
+ TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), N);
42
+ TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), N);
43
+
44
+ NdArrayDesc<N> desc1;
45
+ NdArrayDesc<N> desc2;
46
+ NdArrayDesc<N> output_desc;
47
+ NdArrayDescsForElementwiseBroadcast(
48
+ unextended_input1_shape, unextended_input2_shape, &desc1, &desc2);
49
+ CopyDimsToDesc(RuntimeShape::ExtendedShape(N, unextended_output_shape),
50
+ &output_desc);
51
+
52
+ auto maxmin_func = [&](int indexes[N]) {
53
+ output_data[SubscriptToIndex(output_desc, indexes)] =
54
+ op(input1_data[SubscriptToIndex(desc1, indexes)],
55
+ input2_data[SubscriptToIndex(desc2, indexes)]);
56
+ };
57
+ NDOpsHelper<N>(output_desc, maxmin_func);
58
+ }
59
+ }
60
+
61
+ } // namespace reference_ops
62
+ } // namespace tflite_micro
63
+
64
+ #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_MAXIMUM_MINIMUM_H_