PyPI - tf-nightly-cpu - Versions diffs - 2.20.0.dev20250220__cp311-cp311-win_amd64.whl → 2.20.0.dev20250222__cp311-cp311-win_amd64.whl - Mend

tf-nightly-cpu 2.20.0.dev20250220__cp311-cp311-win_amd64.whl → 2.20.0.dev20250222__cp311-cp311-win_amd64.whl

Files changed (130) hide show

tensorflow/include/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h CHANGED Viewed

@@ -100,11 +100,12 @@ SpatialConvolutionBackwardInput(
     const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
   typedef typename internal::traits<OutputBackward>::Index TensorIndex;
   typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
-  TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
-                   internal::traits<Kernel>::NumDimensions,
-                   internal::traits<Kernel>::Layout, TensorIndex>>
+  TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
+                         internal::traits<Kernel>::NumDimensions,
+                         internal::traits<Kernel>::Layout, TensorIndex>>
       kern(kernel);
-  TensorRef<Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
+  TensorRef<
+      const Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
                    internal::traits<OutputBackward>::Layout, TensorIndex>>
       out(output_backward);
@@ -385,11 +386,12 @@ SpatialConvolutionBackwardKernel(
     const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
   typedef typename internal::traits<Input>::Index TensorIndex;
   typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex>>
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
-  TensorRef<Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
+  TensorRef<
+      const Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
                    internal::traits<OutputBackward>::Layout, TensorIndex>>
       out(output_backward);

tensorflow/include/tensorflow/core/kernels/eigen_cuboid_convolution.h CHANGED Viewed

@@ -1843,13 +1843,13 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
                   const Index strideCols = 1,
                   const PaddingType padding_type = PADDING_SAME) {
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex> >
       in(input);
-  TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
-                   internal::traits<Kernel>::NumDimensions,
-                   internal::traits<Kernel>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
+                         internal::traits<Kernel>::NumDimensions,
+                         internal::traits<Kernel>::Layout, TensorIndex> >
       kern(kernel);
   EIGEN_STATIC_ASSERT(

tensorflow/include/tensorflow/core/kernels/eigen_pooling.h CHANGED Viewed

@@ -55,9 +55,9 @@ SpatialMaxPooling(const Input& input, DenseIndex patchRows,
                       YOU_MADE_A_PROGRAMMING_MISTAKE);
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
   const DenseIndex patchRowsEff =
@@ -148,9 +148,9 @@ CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
   static const int idxPlanes = isColMajor ? 1 : 3;
@@ -383,9 +383,9 @@ SpatialAvgPooling(const Input& input, DenseIndex patchRows,
                       YOU_MADE_A_PROGRAMMING_MISTAKE);
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
   const DenseIndex patchRowsEff =
@@ -475,9 +475,9 @@ CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
   static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
   static const int idxPlanes = isColMajor ? 1 : 3;

tensorflow/include/tensorflow/core/public/release_version.h ADDED Viewed

@@ -0,0 +1,39 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_
+#define TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_
+// TensorFlow uses semantic versioning, see http://semver.org/.
+// Also update tensorflow/tensorflow.bzl and
+// tensorflow/tools/pip_package/setup.py
+#define TF_MAJOR_VERSION 2
+#define TF_MINOR_VERSION 20
+#define TF_PATCH_VERSION 0
+// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
+// "-beta", "-rc", "-rc.1")
+#define TF_VERSION_SUFFIX "-dev20250222"
+#define _TF_STR_HELPER(x) #x
+#define _TF_STR(x) _TF_STR_HELPER(x)
+// e.g. "0.5.0" or "0.6.0-alpha".
+#define TF_VERSION_STRING                                            \
+  (_TF_STR(TF_MAJOR_VERSION) "." _TF_STR(TF_MINOR_VERSION) "." _TF_STR( \
+      TF_PATCH_VERSION) TF_VERSION_SUFFIX)
+#endif  // TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_

tensorflow/include/tensorflow/core/public/version.h CHANGED Viewed

@@ -1,127 +1,112 @@
-/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
-#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
-// TensorFlow uses semantic versioning, see http://semver.org/.
-// Also update tensorflow/tensorflow.bzl and
-// tensorflow/tools/pip_package/setup.py
-#define TF_MAJOR_VERSION 2
-#define TF_MINOR_VERSION 20
-#define TF_PATCH_VERSION 0
-// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
-// "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-dev20250220"
-#define TF_STR_HELPER(x) #x
-#define TF_STR(x) TF_STR_HELPER(x)
-// e.g. "0.5.0" or "0.6.0-alpha".
-#define TF_VERSION_STRING                                            \
-  (TF_STR(TF_MAJOR_VERSION) "." TF_STR(TF_MINOR_VERSION) "." TF_STR( \
-      TF_PATCH_VERSION) TF_VERSION_SUFFIX)
-// GraphDef compatibility versions (the versions field in graph.proto).
-//
-// Each graph has producer and min_consumer versions, and each
-// consumer has its own version and a min_producer.  In addition, graphs can
-// mark specific consumer versions as bad (to prevent bugs from executing).
-// A consumer will execute a graph if the consumer's version is at least the
-// graph's min_consumer, the graph's producer version is at least the consumer's
-// min_producer, and the consumer version isn't specifically disallowed by the
-// graph.
-//
-// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
-// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
-//
-// Version history:
-//
-// 0. Graphs created before GraphDef versioning
-// 1. First real version (2dec2015)
-// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
-// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
-// 4. When support for this version is removed, we can safely make AttrValue
-//    parsing more strict with respect to empty list values (see
-//    111635679, 7jan2016).
-// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
-// 6. TensorFlow is scalar strict within Google (27jan2016).
-// 7. Remove TopK in favor of TopKV2 (5feb2016).
-// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
-// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
-// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
-// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
-// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
-// 13. Deprecate multiple batch linear algebra ops (9sep2016).
-// 14. Deprecate batch_matrix_* ops. (10sep2016).
-// 15. Deprecate batch_fft_* ops. (14sep2016).
-// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
-// 17. Deprecate inv (11nov2016).
-// 17. Expose reverse_v2 (10nov2016)
-// 18. Add VariableV2 (30nov2016)
-// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
-//     (08dec2016)
-// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
-//     used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
-//     now used by tf.concat. Graphs use flooring
-//     division and mod semantics. TensorArrayV3. (12dec2016)
-//     Also considered the version for when it is required for reduction
-//     ops' indices to be scalar or vector, and not higher rank.
-//     Some earlier graph def versions allowed this.
-// 21. Dropped FunctionDef.Node support, switched to node_def introduced
-//     in version 12. (11jan2017)
-// 22. Placeholder now can specify and enforce scalar and partial
-//     shapes, particularly when restoring a graph from GraphDef
-//     produced at version 22 or later.  (04/10/2016)
-// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
-// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
-// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
-// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
-// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
-//     whether default-valued attrs have been stripped from the nodes in the
-//     GraphDef. (7dec2017)
-// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
-//     deprecated in favor of V2 ops. (2018/01/23)
-// 28. Deprecate MatrixExponential op in favor of Python implementation.
-//     (2018/08/21).
-// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
-//     `control_output` field to OpDef proto.
-// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
-//     (2019/03/25).
-// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
-// 30. (2019/05/09) First date based GraphDef version. GraphDef
-//     versions advance by 1 each day after this point.
-#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
-#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 2143  // Updated: 2025/2/19
-// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
-//
-// The checkpoint versions have the same semantics as GraphDef versions, but the
-// numbering scheme is separate.  We have no plans to ever deprecate checkpoint
-// versions, but it's good to have this in place in case we ever need to.
-//
-// Version history:
-//
-// 0. Checkpoints saved before checkpoint versioning.
-// 1. First real version (10feb2015).
-#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
-#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
-#define TF_CHECKPOINT_VERSION 1
-#endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
+#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
+// TensorFlow uses semantic versioning, see http://semver.org/.
+#define TF_STR_HELPER(x) #x
+#define TF_STR(x) TF_STR_HELPER(x)
+// GraphDef compatibility versions (the versions field in graph.proto).
+//
+// Each graph has producer and min_consumer versions, and each
+// consumer has its own version and a min_producer.  In addition, graphs can
+// mark specific consumer versions as bad (to prevent bugs from executing).
+// A consumer will execute a graph if the consumer's version is at least the
+// graph's min_consumer, the graph's producer version is at least the consumer's
+// min_producer, and the consumer version isn't specifically disallowed by the
+// graph.
+//
+// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
+// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
+//
+// Version history:
+//
+// 0. Graphs created before GraphDef versioning
+// 1. First real version (2dec2015)
+// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
+// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
+// 4. When support for this version is removed, we can safely make AttrValue
+//    parsing more strict with respect to empty list values (see
+//    111635679, 7jan2016).
+// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
+// 6. TensorFlow is scalar strict within Google (27jan2016).
+// 7. Remove TopK in favor of TopKV2 (5feb2016).
+// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
+// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
+// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
+// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
+// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
+// 13. Deprecate multiple batch linear algebra ops (9sep2016).
+// 14. Deprecate batch_matrix_* ops. (10sep2016).
+// 15. Deprecate batch_fft_* ops. (14sep2016).
+// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
+// 17. Deprecate inv (11nov2016).
+// 17. Expose reverse_v2 (10nov2016)
+// 18. Add VariableV2 (30nov2016)
+// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
+//     (08dec2016)
+// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
+//     used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
+//     now used by tf.concat. Graphs use flooring
+//     division and mod semantics. TensorArrayV3. (12dec2016)
+//     Also considered the version for when it is required for reduction
+//     ops' indices to be scalar or vector, and not higher rank.
+//     Some earlier graph def versions allowed this.
+// 21. Dropped FunctionDef.Node support, switched to node_def introduced
+//     in version 12. (11jan2017)
+// 22. Placeholder now can specify and enforce scalar and partial
+//     shapes, particularly when restoring a graph from GraphDef
+//     produced at version 22 or later.  (04/10/2016)
+// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
+// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
+// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
+// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
+// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
+//     whether default-valued attrs have been stripped from the nodes in the
+//     GraphDef. (7dec2017)
+// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
+//     deprecated in favor of V2 ops. (2018/01/23)
+// 28. Deprecate MatrixExponential op in favor of Python implementation.
+//     (2018/08/21).
+// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
+//     `control_output` field to OpDef proto.
+// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
+//     (2019/03/25).
+// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
+// 30. (2019/05/09) First date based GraphDef version. GraphDef
+//     versions advance by 1 each day after this point.
+#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
+#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
+#define TF_GRAPH_DEF_VERSION 2145  // Updated: 2025/2/21
+// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
+//
+// The checkpoint versions have the same semantics as GraphDef versions, but the
+// numbering scheme is separate.  We have no plans to ever deprecate checkpoint
+// versions, but it's good to have this in place in case we ever need to.
+//
+// Version history:
+//
+// 0. Checkpoints saved before checkpoint versioning.
+// 1. First real version (10feb2015).
+#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
+#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
+#define TF_CHECKPOINT_VERSION 1
+#endif  // TENSORFLOW_CORE_PUBLIC_VERSION_H_

tensorflow/include/tensorflow/python/eager/pywrap_tfe.h CHANGED Viewed

@@ -443,7 +443,7 @@ EagerContextThreadLocalData* GetEagerContextThreadLocalData(
 // wish to destroy thread-local state associated with a single py_eager_context
 // for multiple threads, then you must call this method from each thread.
 //
-// Thread-local state assocaited with eager contexts is also automatically
+// Thread-local state associated with eager contexts is also automatically
 // cleaned up when the thread is destroyed.
 //
 // This function assumes that the Python GIL is held (and does not perform its

tensorflow/include/xla/backends/cpu/codegen/kernel_api_ir_builder.h CHANGED Viewed

@@ -89,9 +89,10 @@ class KernelApiIrBuilder {
     // read-only if it is not aliased with any result.
     absl::flat_hash_set<int64_t> invariant_arguments;
-    // the set of buffer uses for this kernel, can be empty if buffer
+    // The set of buffers used by this kernel, can be empty if buffer assignment
     // was not provided.
-    absl::InlinedVector<BufferUse, 8> buffer_uses;
+    absl::InlinedVector<BufferAllocation::Slice, 8> argument_buffers;
+    absl::InlinedVector<BufferAllocation::Slice, 8> result_buffers;
   };
   KernelApiIrBuilder(llvm::LLVMContext& context, Options options);

tensorflow/include/xla/backends/cpu/runtime/convolution_thunk_internal.h CHANGED Viewed

@@ -22,7 +22,7 @@ limitations under the License.
 #include <memory>
 #include <utility>
-#include "xla/backends/cpu/runtime/concurrency.h"
+#include "xla/backends/cpu/runtime/work_queue.h"
 #include "xla/tsl/concurrency/async_value_ref.h"
 #include "xla/tsl/concurrency/chain.h"
 #include "xla/tsl/framework/convolution/eigen_spatial_convolutions.h"  // IWYU pragma: keep
@@ -30,7 +30,6 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 #include "Eigen/Core"
-#include "Eigen/ThreadPool"
 #include "unsupported/Eigen/CXX11/Tensor"
 namespace xla::cpu::internal {
@@ -384,8 +383,9 @@ void EigenGenericConv2D(
     auto num_tasks = Eigen::numext::div_ceil(feature_group_count, task_size);
     if (use_thunk_runtime) {
-      ScheduleAll(
-          &device, num_tasks, [=, &device](Eigen::Index task_index) mutable {
+      Worker::Parallelize(
+          &device, /*num_workers=*/num_tasks, num_tasks,
+          [=, &device](Eigen::Index task_index) mutable {
             Eigen::Index start = task_index * task_size;
             Eigen::Index end = std::min(start + task_size, feature_group_count);
             for (Eigen::Index i = start; i < end; ++i) {
@@ -395,18 +395,16 @@ void EigenGenericConv2D(
             }
           });
     } else {
-      Eigen::Barrier barrier(num_tasks);
-      ScheduleAll(
-          &device, num_tasks, [=, &device, &barrier](Eigen::Index task_index) {
+      tsl::BlockUntilReady(Worker::Parallelize(
+          &device, /*num_workers=*/num_tasks, num_tasks,
+          [=, &device](Eigen::Index task_index) {
             Eigen::Index start = task_index * task_size;
             Eigen::Index end = std::min(start + task_size, feature_group_count);
             for (Eigen::Index i = start; i < end; ++i) {
               auto [output, convolved] = convolve_group(i);
               output.device(device) = convolved;
             }
-            barrier.Notify();
-          });
-      barrier.Wait();
+          }));
     }
   } else {

tensorflow/include/xla/backends/cpu/runtime/kernel_thunk.h CHANGED Viewed

@@ -63,6 +63,8 @@ class KernelThunkBase : public Thunk {
       const = 0;
   virtual absl::Span<const BufferAllocation::Slice> results_buffers() const = 0;
+  virtual const absl::flat_hash_set<int64_t>& invariant_arguments() const = 0;
 };
 namespace internal {
@@ -95,6 +97,10 @@ class KernelThunk : public KernelThunkBase {
     return absl::MakeSpan(results_buffers_);
   }
+  const absl::flat_hash_set<int64_t>& invariant_arguments() const final {
+    return invariant_arguments_;
+  }
  protected:
   tsl::AsyncValueRef<ExecuteEvent> ExecuteInternal(const ExecuteParams& params);
@@ -129,7 +135,7 @@ class KernelThunk : public KernelThunkBase {
   KernelThunk(Info info,
               absl::Span<const BufferAllocation::Slice> arguments_buffers,
               absl::Span<const BufferAllocation::Slice> results_buffers,
-              std::optional<absl::flat_hash_set<int64_t>> invariant_arguments,
+              absl::flat_hash_set<int64_t> invariant_arguments,
               std::string kernel_name, se::ThreadDim thread_dim,
               std::optional<uint64_t> min_alignment);
@@ -139,7 +145,7 @@ class KernelThunk : public KernelThunkBase {
   ResultsBuffers results_buffers_;
   // A set of invariant arguments (their indices).
-  std::optional<absl::flat_hash_set<int64_t>> invariant_arguments_;
+  absl::flat_hash_set<int64_t> invariant_arguments_;
   size_t num_kernel_args_;
@@ -189,7 +195,7 @@ class KernelThunk final : public internal::KernelThunk<> {
       absl::Span<const BufferAllocation::Slice> arguments_buffers,
       absl::Span<const BufferAllocation::Slice> results_buffers,
       std::string kernel_name, se::ThreadDim thread_dim,
-      std::optional<absl::flat_hash_set<int64_t>> invariant_arguments,
+      absl::flat_hash_set<int64_t> invariant_arguments,
       std::optional<uint64_t> min_alignment = std::nullopt);
   static absl::StatusOr<std::unique_ptr<Thunk>> Create(