PyPI - tf-nightly-cpu - Versions diffs - 2.20.0.dev20250220__cp39-cp39-win_amd64.whl → 2.20.0.dev20250222__cp39-cp39-win_amd64.whl - Mend

tf-nightly-cpu 2.20.0.dev20250220__cp39-cp39-win_amd64.whl → 2.20.0.dev20250222__cp39-cp39-win_amd64.whl

Files changed (130) hide show

tensorflow/include/tensorflow/compiler/xla/pjrt/gpu/se_gpu_topology_description.h ADDED Viewed

@@ -0,0 +1,126 @@
+/* Copyright 2025 The OpenXLA Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef XLA_PJRT_GPU_SE_GPU_TOPOLOGY_DESCRIPTION_H_
+#define XLA_PJRT_GPU_SE_GPU_TOPOLOGY_DESCRIPTION_H_
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+#include "absl/container/flat_hash_map.h"
+#include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "xla/pjrt/gpu/gpu_topology.h"
+#include "xla/pjrt/pjrt_compiler.h"
+#include "xla/pjrt/pjrt_device_description.h"
+#include "xla/pjrt/pjrt_stream_executor_device_description.h"
+namespace xla {
+class StreamExecutorGpuTopologyDescription : public PjRtTopologyDescription {
+ public:
+  StreamExecutorGpuTopologyDescription(
+      const PjRtPlatformId platform_id, const absl::string_view platform_name,
+      std::shared_ptr<const GpuTopology> gpu_topology,
+      const absl::flat_hash_map<std::string, PjRtDeviceAttribute>& attributes =
+          {},
+      std::optional<stream_executor::GpuTargetConfigProto> target_config =
+          std::nullopt)
+      : platform_id_(platform_id),
+        platform_name_(platform_name),
+        gpu_topology_(std::move(gpu_topology)),
+        attributes_(attributes),
+        target_config_(std::move(target_config)) {}
+  bool operator==(const StreamExecutorGpuTopologyDescription& other) const {
+    return this->platform_id() == other.platform_id() &&
+           this->platform_name() == other.platform_name() &&
+           this->platform_version() == other.platform_version() &&
+           this->gpu_topology() == other.gpu_topology();
+  }
+  PjRtPlatformId platform_id() const override { return platform_id_; }
+  absl::string_view platform_name() const override { return platform_name_; }
+  absl::string_view platform_version() const override {
+    return gpu_topology_->platform_version();
+  }
+  std::vector<std::unique_ptr<const PjRtDeviceDescription>> DeviceDescriptions()
+      const override {
+    std::vector<std::unique_ptr<const PjRtDeviceDescription>> devices;
+    devices.reserve(gpu_topology_->number_of_devices());
+    for (const int device_id : gpu_topology_->device_ids()) {
+      devices.push_back(std::make_unique<PjRtStreamExecutorDeviceDescription>(
+          device_id, std::string(platform_version())));
+    }
+    return devices;
+  }
+  const GpuTopology& gpu_topology() const { return *gpu_topology_; }
+  const GpuTopology* gpu_topology_ptr() const { return gpu_topology_.get(); }
+  // No subslice is supported.
+  bool is_subslice_topology() const override { return false; }
+  absl::StatusOr<int> ProcessCount() const override {
+    return gpu_topology_->number_of_hosts();
+  }
+  absl::StatusOr<int> CoreCountOfDefaultType() const override {
+    return gpu_topology_->number_of_devices();
+  }
+  absl::StatusOr<int> LogicalDeviceCountOfDefaultType() const override {
+    return gpu_topology_->number_of_devices();
+  }
+  absl::StatusOr<int> CoreCountOfDefaultTypePerProcess() const override {
+    return gpu_topology_->number_of_devices();
+  }
+  absl::StatusOr<int> CoreCountOfDefaultTypePerChip() const override {
+    return 1;
+  }
+  absl::StatusOr<std::string> Serialize() const override;
+  const std::optional<stream_executor::GpuTargetConfigProto>& target_config()
+      const {
+    return target_config_;
+  }
+  // Returns vendor specific attributes about the topology.
+  const absl::flat_hash_map<std::string, PjRtDeviceAttribute>& Attributes()
+      const override {
+    return attributes_;
+  }
+  absl::StatusOr<Layout> GetDefaultLayout(
+      PrimitiveType element_type,
+      absl::Span<const int64_t> dims) const override;
+ private:
+  const PjRtPlatformId platform_id_;
+  const std::string platform_name_;
+  std::shared_ptr<const GpuTopology> gpu_topology_;
+  absl::flat_hash_map<std::string, xla::PjRtDeviceAttribute> attributes_;
+  std::optional<stream_executor::GpuTargetConfigProto> target_config_;
+};
+}  // namespace xla
+#endif  // XLA_PJRT_GPU_SE_GPU_TOPOLOGY_DESCRIPTION_H_

tensorflow/include/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.h CHANGED Viewed

@@ -52,9 +52,9 @@ limitations under the License.
 #include "xla/pjrt/pjrt_client.h"
 #include "xla/pjrt/pjrt_common.h"
 #include "xla/pjrt/pjrt_compiler.h"
-#include "xla/pjrt/pjrt_device_description.h"
 #include "xla/pjrt/pjrt_executable.h"
 #include "xla/pjrt/pjrt_future.h"
+#include "xla/pjrt/pjrt_stream_executor_device_description.h"
 #include "xla/pjrt/tracked_device_buffer.h"
 #include "xla/pjrt/transpose.h"
 #include "xla/pjrt/utils.h"
@@ -77,54 +77,6 @@ limitations under the License.
 namespace xla {
-class PjRtStreamExecutorDeviceDescription : public PjRtDeviceDescription {
- public:
-  explicit PjRtStreamExecutorDeviceDescription(int id, std::string device_kind,
-                                               int process_index = 0)
-      : id_(id),
-        process_index_(process_index),
-        device_kind_(std::move(device_kind)) {}
-  int id() const override { return id_; }
-  int process_index() const override { return process_index_; }
-  absl::string_view device_kind() const override { return device_kind_; }
-  absl::string_view ToString() const override { return to_string_; }
-  absl::string_view DebugString() const override { return debug_string_; }
-  absl::Span<int const> coords() const { return absl::MakeSpan(coords_); }
-  const absl::flat_hash_map<std::string, PjRtDeviceAttribute>& Attributes()
-      const override {
-    return attributes_;
-  }
-  void SetAttributes(
-      absl::flat_hash_map<std::string, PjRtDeviceAttribute> attributes) {
-    attributes_ = std::move(attributes);
-  }
-  void SetDebugString(std::string debug_string) {
-    debug_string_ = std::move(debug_string);
-  }
-  void SetToString(std::string to_string) { to_string_ = std::move(to_string); }
-  void SetCoords(std::array<int, 1> coords) { coords_ = coords; }
- private:
-  const int id_;
-  const int process_index_;
-  const std::string device_kind_;
-  std::string debug_string_ = "<unknown SE device>";
-  std::string to_string_ = "<unknown SE device>";
-  absl::flat_hash_map<std::string, PjRtDeviceAttribute> attributes_;
-  std::array<int, 1> coords_;
-};
 class PjRtStreamExecutorDevice : public PjRtDevice {
  public:
   explicit PjRtStreamExecutorDevice(

tensorflow/include/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_device_description.h ADDED Viewed

@@ -0,0 +1,75 @@
+/* Copyright 2025 The OpenXLA Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef XLA_PJRT_PJRT_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_
+#define XLA_PJRT_PJRT_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_
+#include <array>
+#include <string>
+#include <utility>
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "xla/pjrt/pjrt_device_description.h"
+namespace xla {
+class PjRtStreamExecutorDeviceDescription : public PjRtDeviceDescription {
+ public:
+  explicit PjRtStreamExecutorDeviceDescription(int id, std::string device_kind,
+                                               int process_index = 0)
+      : id_(id),
+        process_index_(process_index),
+        device_kind_(std::move(device_kind)) {}
+  int id() const override { return id_; }
+  int process_index() const override { return process_index_; }
+  absl::string_view device_kind() const override { return device_kind_; }
+  absl::string_view ToString() const override { return to_string_; }
+  absl::string_view DebugString() const override { return debug_string_; }
+  absl::Span<int const> coords() const { return absl::MakeSpan(coords_); }
+  const absl::flat_hash_map<std::string, PjRtDeviceAttribute>& Attributes()
+      const override {
+    return attributes_;
+  }
+  void SetAttributes(
+      absl::flat_hash_map<std::string, PjRtDeviceAttribute> attributes) {
+    attributes_ = std::move(attributes);
+  }
+  void SetDebugString(std::string debug_string) {
+    debug_string_ = std::move(debug_string);
+  }
+  void SetToString(std::string to_string) { to_string_ = std::move(to_string); }
+  void SetCoords(std::array<int, 1> coords) { coords_ = coords; }
+ private:
+  const int id_;
+  const int process_index_;
+  const std::string device_kind_;
+  std::string debug_string_ = "<unknown SE device>";
+  std::string to_string_ = "<unknown SE device>";
+  absl::flat_hash_map<std::string, PjRtDeviceAttribute> attributes_;
+  std::array<int, 1> coords_;
+};
+}  // namespace xla
+#endif  // XLA_PJRT_PJRT_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_

tensorflow/include/tensorflow/compiler/xla/pjrt/plugin/xla_cpu/cpu_execute_options.h ADDED Viewed

@@ -0,0 +1,57 @@
+/* Copyright 2025 The OpenXLA Authors.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef XLA_PJRT_PLUGIN_XLA_CPU_CPU_EXECUTE_OPTIONS_H_
+#define XLA_PJRT_PLUGIN_XLA_CPU_CPU_EXECUTE_OPTIONS_H_
+#include <optional>
+#include "xla/backends/cpu/collectives/cpu_collectives.h"
+#include "xla/pjrt/pjrt_executable.h"
+namespace xla {
+// ExecuteContext for XLA:CPU PjRtLoadedExecutable::Execute calls.
+class CpuExecuteContext : public ExecuteContext {
+ public:
+  ~CpuExecuteContext() override = default;
+  // If specified, override the process ID specified in
+  // `CpuClientOptions::process_id` for a particular call of
+  // PjRtLoadedExecutable::Execute.
+  //
+  // TODO(hyeontaek): Look for a collectives-agnostic way and combine this
+  // option with `ExecuteOptions::multi_slice_config`.
+  std::optional<int>& process_index() { return process_index_; }
+  std::optional<int> process_index() const { return process_index_; }
+  // If specified, override CPU collectives specified in
+  // `CpuClientOptions::collectives` for a particular call of
+  // PjRtLoadedExecutable::Execute. Must remain valid until the execution
+  // finishes.
+  //
+  // TODO(hyeontaek): Look for a collectives-agnostic way and combine this
+  // option with `ExecuteOptions::multi_slice_config`.
+  cpu::CpuCollectives*& collectives() { return collectives_; }
+  cpu::CpuCollectives* collectives() const { return collectives_; }
+ private:
+  std::optional<int> process_index_;
+  cpu::CpuCollectives* collectives_ = nullptr;
+};
+}  // namespace xla
+#endif  // XLA_PJRT_PLUGIN_XLA_CPU_CPU_EXECUTE_OPTIONS_H_

tensorflow/include/tensorflow/compiler/xla/pjrt/plugin/xla_cpu/cpu_topology.h CHANGED Viewed

@@ -69,6 +69,10 @@ inline int UnpackCpuProcessIndex(PjRtGlobalDeviceId global_device_id) {
   return global_device_id.value() / kMaxCpuDevicesPerProcess;
 }
+inline int UnpackCpuLocalDeviceId(PjRtGlobalDeviceId global_device_id) {
+  return global_device_id.value() % kMaxCpuDevicesPerProcess;
+}
 }  // namespace xla
 #endif  // XLA_PJRT_PLUGIN_XLA_CPU_CPU_TOPOLOGY_H_

tensorflow/include/tensorflow/compiler/xla/service/constant_value.h CHANGED Viewed

@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
+#include "absl/base/casts.h"
 #include "absl/status/statusor.h"
 #include "xla/literal.h"
 #include "xla/util.h"

tensorflow/include/tensorflow/compiler/xla/service/hlo_module_util.h CHANGED Viewed

@@ -19,16 +19,67 @@ limitations under the License.
 #include <functional>
 #include <memory>
 #include <optional>
+#include <string>
-#include "absl/status/status.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
 #include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
 #include "absl/types/span.h"
+#include "xla/hlo/ir/hlo_module.h"
+#include "xla/hlo/parser/hlo_parser.h"
 #include "xla/service/compiler.h"
 #include "xla/service/hlo_module_config.h"
 #include "xla/shape.h"
+#include "xla/util.h"
 namespace xla {
+// Converts an HloModule from the given hlo textual IR string (in
+// HloModule::ToString format).
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromString(
+    absl::string_view hlo_string,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Creates an HloModule from the given proto.
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Create an HLO state from serialized representation. In addition to
+// creating the proto with HloModule::CreateFromProto(...) it also
+// uses HloVerifier to ensure basic invariants are held.
+// The HLO module could be a pre-optimizations (default) or post-optimizations
+// module, which affects how the HLO module is verified, e.g., mixed-precision
+// is allowed in post-optimizations HLOs.
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config,
+    bool is_module_post_optimizations = false);
+// Reads the proto file in xla.HloProto format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromBinaryProtoFile(
+    absl::string_view filename,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Reads the proto file in xla.HloModule format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromModuleBinaryProtofile(
+    absl::string_view filename, const DebugOptions& debug_options);
+// Reads the HLO text dump file in HloModule::ToString format, creates and
+// returns the HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromHloTextFile(
+    absl::string_view filename,
+    const DebugOptions& debug_options = DebugOptions::default_instance(),
+    const HloParserOptions& options = HloParserOptions());
+// Reads the proto file in xla.HloProto format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromTextProtoFile(
+    absl::string_view hlo_file,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
 // Creates an HloModuleConfig for a given program shape and arguments.
 // If execution_options does not set num_replicas, default_num_replicas is used.
 // num_threads is optional; if not given, intra_op_parallelism_threads not set.

tensorflow/include/tensorflow/compiler/xla/service/hlo_proto_util.h CHANGED Viewed

@@ -18,8 +18,6 @@ limitations under the License.
 #ifndef XLA_SERVICE_HLO_PROTO_UTIL_H_
 #define XLA_SERVICE_HLO_PROTO_UTIL_H_
-#include <string>
 #include "absl/status/status.h"
 #include "xla/hlo/ir/hlo_module.h"
 #include "xla/service/buffer_assignment.h"
@@ -35,16 +33,6 @@ HloProto MakeHloProto(const HloModule& module,
 // will not be included in the output.
 HloProto MakeHloProto(const HloModule& module);
-// Create an HLO state from serialized representation. In addition to
-// creating the proto with HloModule::CreateFromProto(...) it also
-// uses HloVerifier to ensure basic invariants are held.
-// The HLO module could be a pre-optimizations (default) or post-optimizations
-// module, which affects how the HLO module is verified, e.g., mixed-precision
-// is allowed in post-optimizations HLOs.
-absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
-    const HloModuleProto& proto, const HloModuleConfig& module_config,
-    bool is_module_post_optimizations = false);
 // Returns the shapes of the parameters of the entry computation. Shape pointers
 // refer to shapes inside of the given HloProto.
 absl::StatusOr<std::vector<const ShapeProto*>> EntryComputationParameterShapes(

tensorflow/include/tensorflow/compiler/xla/tsl/concurrency/async_value.h CHANGED Viewed

@@ -35,9 +35,6 @@ limitations under the License.
 #include "xla/tsl/platform/logging.h"
 namespace tsl {
-class NotifierListNode;
 namespace internal {
 template <typename T>
@@ -277,6 +274,8 @@ class AsyncValue {
  protected:
   friend class IndirectAsyncValue;
+  struct WaiterListNode;
   static constexpr uint16_t kUnknownTypeId = 0;
   // Utility template for tag dispatching.
@@ -311,7 +310,7 @@ class AsyncValue {
   void NotifyAvailable(State available_state);
   void Destroy();
-  void RunWaiters(NotifierListNode* list);
+  void RunWaiters(WaiterListNode* list);
   // IsTypeIdCompatible returns true if the type value stored in this AsyncValue
   // instance can be safely cast to `T`. This is a conservative check. I.e.
@@ -369,6 +368,16 @@ class AsyncValue {
   // This is a 16-bit value that identifies the type.
   uint16_t type_id_ = 0;
+  // This is a singly linked list of nodes waiting for notification, hanging off
+  // of AsyncValue. When the value becomes available or if an error occurs, the
+  // callbacks are informed.
+  struct WaiterListNode {
+    virtual ~WaiterListNode() = default;
+    virtual void operator()() = 0;
+    WaiterListNode* next = nullptr;
+  };
   // The waiter list and the state are compacted into one single atomic word as
   // accesses to them are tightly related. To change the state from unavailable
   // (i.e. kUnconstructed or kConstructed) to available
@@ -379,7 +388,7 @@ class AsyncValue {
   // Invariant: If the state is not available, then the waiter list must be
   // nullptr.
   struct WaitersAndState {
-    // We rely on the fact that all `NotifierListNode` values are aligned at
+    // We rely on the fact that all `WaiterListNode` values are aligned at
     // least to 4 bytes and we can encode state in the lowest 2 bits. We use
     // the conservative estimation of the minimal alignment of pointers returned
     // from memory allocation functions.
@@ -390,7 +399,7 @@ class AsyncValue {
     static constexpr uintptr_t kStateMask = (1ull << 2) - 1;
     static constexpr uintptr_t kPointerMask = ~kStateMask;
-    WaitersAndState(NotifierListNode* ptr, State state) {
+    WaitersAndState(WaiterListNode* ptr, State state) {
       value = (reinterpret_cast<uintptr_t>(ptr) & kPointerMask) |
               (state & kStateMask);
     }
@@ -399,8 +408,8 @@ class AsyncValue {
       return State(static_cast<State::StateEnum>(value & kStateMask));
     }
-    NotifierListNode* waiter() const {
-      return reinterpret_cast<NotifierListNode*>(value & kPointerMask);
+    WaiterListNode* waiter() const {
+      return reinterpret_cast<WaiterListNode*>(value & kPointerMask);
     }
     uintptr_t value;
@@ -466,8 +475,26 @@ class AsyncValue {
     return (*type_info_table)[type_id_ - 1];
   }
-  void EnqueueWaiter(absl::AnyInvocable<void()> waiter,
-                     WaitersAndState old_value);
+  // Adds a waiter list node to the waiter linked list. If the value is
+  // available or becomes available, this calls the waiter immediately.
+  // Otherwise, we add waiter to the list where it will be called when the value
+  // becomes available.
+  void EnqueueWaiterListNode(WaiterListNode* waiter,
+                             WaitersAndState waiters_and_state);
+  template <typename Waiter>
+  void EnqueueWaiter(Waiter&& waiter, WaitersAndState waiters_and_state) {
+    static_assert(std::is_invocable_v<Waiter>, "Waiter must be invocable");
+    struct Node final : public WaiterListNode {
+      explicit Node(Waiter waiter) : waiter(std::move(waiter)) {}
+      void operator()() final { waiter(); }
+      Waiter waiter;
+    };
+    EnqueueWaiterListNode(new Node{std::forward<Waiter>(waiter)},
+                          waiters_and_state);
+  }
   // This is a global counter of the number of AsyncValue instances currently
   // live in the process.  This is intended to be used for debugging only, and
@@ -983,14 +1010,15 @@ void AsyncValue::AndThen(Waiter&& waiter) {
   // Clients generally want to use AndThen without them each having to check
   // to see if the value is present. Check for them, and immediately run the
   // waiter if it is already here.
-  auto old_value = waiters_and_state_.load(std::memory_order_acquire);
-  if (old_value.state() == State::kConcrete ||
-      old_value.state() == State::kError) {
-    DCHECK_EQ(old_value.waiter(), nullptr);
+  auto waiters_and_state = waiters_and_state_.load(std::memory_order_acquire);
+  if (waiters_and_state.state() == State::kConcrete ||
+      waiters_and_state.state() == State::kError) {
+    DCHECK_EQ(waiters_and_state.waiter(), nullptr);
     waiter();
     return;
   }
-  EnqueueWaiter(std::forward<Waiter>(waiter), old_value);
+  EnqueueWaiter(std::forward<Waiter>(waiter), waiters_and_state);
 }
 template <typename Waiter>
@@ -998,18 +1026,19 @@ void AsyncValue::AndThen(Executor& executor, Waiter&& waiter) {
   // Clients generally want to use AndThen without them each having to check
   // to see if the value is present. Check for them, and immediately run the
   // waiter if it is already here.
-  auto old_value = waiters_and_state_.load(std::memory_order_acquire);
-  if (old_value.state() == State::kConcrete ||
-      old_value.state() == State::kError) {
-    DCHECK_EQ(old_value.waiter(), nullptr);
+  auto waiters_and_state = waiters_and_state_.load(std::memory_order_acquire);
+  if (waiters_and_state.state() == State::kConcrete ||
+      waiters_and_state.state() == State::kError) {
+    DCHECK_EQ(waiters_and_state.waiter(), nullptr);
     executor.Execute(std::forward<Waiter>(waiter));
     return;
   }
   EnqueueWaiter(
-      [&executor, waiter = std::forward<Waiter>(waiter)]() mutable {
+      [&executor, waiter = std::forward<Waiter>(waiter)] {
         executor.Execute(std::move(waiter));
       },
-      old_value);
+      waiters_and_state);
 }
 inline void AsyncValue::Destroy() {

tensorflow/include/tensorflow/compiler/xla/tsl/framework/convolution/eigen_spatial_convolutions-inl.h CHANGED Viewed

@@ -1604,12 +1604,12 @@ SpatialConvolution(const Input& input, const Kernel& kernel,
                    Index padding_left = 0, Index padding_right = 0) {
   typedef typename internal::traits<Input>::Index TensorIndex;
   typedef typename internal::traits<Input>::Scalar InputScalar;
-  TensorRef<Tensor<InputScalar, internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<InputScalar, internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex> >
       in(input);
-  TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
-                   internal::traits<Kernel>::NumDimensions,
-                   internal::traits<Kernel>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
+                         internal::traits<Kernel>::NumDimensions,
+                         internal::traits<Kernel>::Layout, TensorIndex> >
       kern(kernel);
   EIGEN_STATIC_ASSERT(

tensorflow/include/tensorflow/core/kernels/data/experimental/random_access_ops.h CHANGED Viewed

@@ -35,8 +35,6 @@ class GetElementAtIndexOp : public AsyncOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_));
   }
-  ~GetElementAtIndexOp() override {}
   void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override {
     unbounded_threadpool_.Schedule([this, ctx, done = std::move(done)]() {
       ctx->SetStatus(DoCompute(ctx));

tensorflow/include/tensorflow/core/kernels/eigen_attention.h CHANGED Viewed

@@ -68,8 +68,8 @@ struct GlimpseExtractionOp {
   template <typename Input>
   DSizes<Index, 4> dimensions(const Input& input) const {
     typedef typename internal::traits<Input>::Index IndexType;
-    typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4,
-                             internal::traits<Input>::Layout, IndexType> >
+    typedef TensorRef<const Tensor<typename internal::traits<Input>::Scalar, 4,
+                                   internal::traits<Input>::Layout, IndexType> >
         Ref;
     Ref in(input);
@@ -86,8 +86,8 @@ struct GlimpseExtractionOp {
   EIGEN_DEVICE_FUNC void eval(const Input& input, Output& output,
                               const Device& device) const {
     typedef typename internal::traits<Input>::Index IndexType;
-    typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4,
-                             internal::traits<Input>::Layout, IndexType> >
+    typedef TensorRef<const Tensor<typename internal::traits<Input>::Scalar, 4,
+                                   internal::traits<Input>::Layout, IndexType> >
         Ref;
     Ref in(input);
     const Index num_channels = in.dimension(0);

tensorflow/include/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h CHANGED Viewed

@@ -394,13 +394,13 @@ CuboidConvolutionBackwardKernel(
     const DenseIndex stridePlanes = 1, const DenseIndex strideRows = 1,
     const DenseIndex strideCols = 1) {
   typedef typename internal::traits<Input>::Index TensorIndex;
-  TensorRef<Tensor<typename internal::traits<Input>::Scalar,
-                   internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex>>
+  TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
+                         internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex>>
       in(input);
-  TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar,
-                   internal::traits<OutputBackward>::NumDimensions,
-                   internal::traits<OutputBackward>::Layout, TensorIndex>>
+  TensorRef<const Tensor<typename internal::traits<OutputBackward>::Scalar,
+                         internal::traits<OutputBackward>::NumDimensions,
+                         internal::traits<OutputBackward>::Layout, TensorIndex>>
       out(output_backward);
   EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout ==