PyPI - tf-nightly-cpu - Versions diffs - 2.20.0.dev20250220__cp39-cp39-win_amd64.whl → 2.20.0.dev20250222__cp39-cp39-win_amd64.whl - Mend

tf-nightly-cpu 2.20.0.dev20250220__cp39-cp39-win_amd64.whl → 2.20.0.dev20250222__cp39-cp39-win_amd64.whl

Files changed (130) hide show

tensorflow/include/xla/service/hlo_module_util.h CHANGED Viewed

@@ -19,16 +19,67 @@ limitations under the License.
 #include <functional>
 #include <memory>
 #include <optional>
+#include <string>
-#include "absl/status/status.h"
+#include "absl/log/check.h"
+#include "absl/log/log.h"
 #include "absl/status/statusor.h"
+#include "absl/strings/string_view.h"
 #include "absl/types/span.h"
+#include "xla/hlo/ir/hlo_module.h"
+#include "xla/hlo/parser/hlo_parser.h"
 #include "xla/service/compiler.h"
 #include "xla/service/hlo_module_config.h"
 #include "xla/shape.h"
+#include "xla/util.h"
 namespace xla {
+// Converts an HloModule from the given hlo textual IR string (in
+// HloModule::ToString format).
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromString(
+    absl::string_view hlo_string,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Creates an HloModule from the given proto.
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Create an HLO state from serialized representation. In addition to
+// creating the proto with HloModule::CreateFromProto(...) it also
+// uses HloVerifier to ensure basic invariants are held.
+// The HLO module could be a pre-optimizations (default) or post-optimizations
+// module, which affects how the HLO module is verified, e.g., mixed-precision
+// is allowed in post-optimizations HLOs.
+absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
+    const HloModuleProto& proto, const HloModuleConfig& module_config,
+    bool is_module_post_optimizations = false);
+// Reads the proto file in xla.HloProto format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromBinaryProtoFile(
+    absl::string_view filename,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
+// Reads the proto file in xla.HloModule format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromModuleBinaryProtofile(
+    absl::string_view filename, const DebugOptions& debug_options);
+// Reads the HLO text dump file in HloModule::ToString format, creates and
+// returns the HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromHloTextFile(
+    absl::string_view filename,
+    const DebugOptions& debug_options = DebugOptions::default_instance(),
+    const HloParserOptions& options = HloParserOptions());
+// Reads the proto file in xla.HloProto format, creates and returns the
+// HloModule.
+absl::StatusOr<std::unique_ptr<HloModule>> ReadModuleFromTextProtoFile(
+    absl::string_view hlo_file,
+    const DebugOptions& debug_options = DebugOptions::default_instance());
 // Creates an HloModuleConfig for a given program shape and arguments.
 // If execution_options does not set num_replicas, default_num_replicas is used.
 // num_threads is optional; if not given, intra_op_parallelism_threads not set.

tensorflow/include/xla/service/hlo_proto_util.h CHANGED Viewed

@@ -18,8 +18,6 @@ limitations under the License.
 #ifndef XLA_SERVICE_HLO_PROTO_UTIL_H_
 #define XLA_SERVICE_HLO_PROTO_UTIL_H_
-#include <string>
 #include "absl/status/status.h"
 #include "xla/hlo/ir/hlo_module.h"
 #include "xla/service/buffer_assignment.h"
@@ -35,16 +33,6 @@ HloProto MakeHloProto(const HloModule& module,
 // will not be included in the output.
 HloProto MakeHloProto(const HloModule& module);
-// Create an HLO state from serialized representation. In addition to
-// creating the proto with HloModule::CreateFromProto(...) it also
-// uses HloVerifier to ensure basic invariants are held.
-// The HLO module could be a pre-optimizations (default) or post-optimizations
-// module, which affects how the HLO module is verified, e.g., mixed-precision
-// is allowed in post-optimizations HLOs.
-absl::StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
-    const HloModuleProto& proto, const HloModuleConfig& module_config,
-    bool is_module_post_optimizations = false);
 // Returns the shapes of the parameters of the entry computation. Shape pointers
 // refer to shapes inside of the given HloProto.
 absl::StatusOr<std::vector<const ShapeProto*>> EntryComputationParameterShapes(

tensorflow/include/xla/tsl/concurrency/async_value.h CHANGED Viewed

@@ -35,9 +35,6 @@ limitations under the License.
 #include "xla/tsl/platform/logging.h"
 namespace tsl {
-class NotifierListNode;
 namespace internal {
 template <typename T>
@@ -277,6 +274,8 @@ class AsyncValue {
  protected:
   friend class IndirectAsyncValue;
+  struct WaiterListNode;
   static constexpr uint16_t kUnknownTypeId = 0;
   // Utility template for tag dispatching.
@@ -311,7 +310,7 @@ class AsyncValue {
   void NotifyAvailable(State available_state);
   void Destroy();
-  void RunWaiters(NotifierListNode* list);
+  void RunWaiters(WaiterListNode* list);
   // IsTypeIdCompatible returns true if the type value stored in this AsyncValue
   // instance can be safely cast to `T`. This is a conservative check. I.e.
@@ -369,6 +368,16 @@ class AsyncValue {
   // This is a 16-bit value that identifies the type.
   uint16_t type_id_ = 0;
+  // This is a singly linked list of nodes waiting for notification, hanging off
+  // of AsyncValue. When the value becomes available or if an error occurs, the
+  // callbacks are informed.
+  struct WaiterListNode {
+    virtual ~WaiterListNode() = default;
+    virtual void operator()() = 0;
+    WaiterListNode* next = nullptr;
+  };
   // The waiter list and the state are compacted into one single atomic word as
   // accesses to them are tightly related. To change the state from unavailable
   // (i.e. kUnconstructed or kConstructed) to available
@@ -379,7 +388,7 @@ class AsyncValue {
   // Invariant: If the state is not available, then the waiter list must be
   // nullptr.
   struct WaitersAndState {
-    // We rely on the fact that all `NotifierListNode` values are aligned at
+    // We rely on the fact that all `WaiterListNode` values are aligned at
     // least to 4 bytes and we can encode state in the lowest 2 bits. We use
     // the conservative estimation of the minimal alignment of pointers returned
     // from memory allocation functions.
@@ -390,7 +399,7 @@ class AsyncValue {
     static constexpr uintptr_t kStateMask = (1ull << 2) - 1;
     static constexpr uintptr_t kPointerMask = ~kStateMask;
-    WaitersAndState(NotifierListNode* ptr, State state) {
+    WaitersAndState(WaiterListNode* ptr, State state) {
       value = (reinterpret_cast<uintptr_t>(ptr) & kPointerMask) |
               (state & kStateMask);
     }
@@ -399,8 +408,8 @@ class AsyncValue {
       return State(static_cast<State::StateEnum>(value & kStateMask));
     }
-    NotifierListNode* waiter() const {
-      return reinterpret_cast<NotifierListNode*>(value & kPointerMask);
+    WaiterListNode* waiter() const {
+      return reinterpret_cast<WaiterListNode*>(value & kPointerMask);
     }
     uintptr_t value;
@@ -466,8 +475,26 @@ class AsyncValue {
     return (*type_info_table)[type_id_ - 1];
   }
-  void EnqueueWaiter(absl::AnyInvocable<void()> waiter,
-                     WaitersAndState old_value);
+  // Adds a waiter list node to the waiter linked list. If the value is
+  // available or becomes available, this calls the waiter immediately.
+  // Otherwise, we add waiter to the list where it will be called when the value
+  // becomes available.
+  void EnqueueWaiterListNode(WaiterListNode* waiter,
+                             WaitersAndState waiters_and_state);
+  template <typename Waiter>
+  void EnqueueWaiter(Waiter&& waiter, WaitersAndState waiters_and_state) {
+    static_assert(std::is_invocable_v<Waiter>, "Waiter must be invocable");
+    struct Node final : public WaiterListNode {
+      explicit Node(Waiter waiter) : waiter(std::move(waiter)) {}
+      void operator()() final { waiter(); }
+      Waiter waiter;
+    };
+    EnqueueWaiterListNode(new Node{std::forward<Waiter>(waiter)},
+                          waiters_and_state);
+  }
   // This is a global counter of the number of AsyncValue instances currently
   // live in the process.  This is intended to be used for debugging only, and
@@ -983,14 +1010,15 @@ void AsyncValue::AndThen(Waiter&& waiter) {
   // Clients generally want to use AndThen without them each having to check
   // to see if the value is present. Check for them, and immediately run the
   // waiter if it is already here.
-  auto old_value = waiters_and_state_.load(std::memory_order_acquire);
-  if (old_value.state() == State::kConcrete ||
-      old_value.state() == State::kError) {
-    DCHECK_EQ(old_value.waiter(), nullptr);
+  auto waiters_and_state = waiters_and_state_.load(std::memory_order_acquire);
+  if (waiters_and_state.state() == State::kConcrete ||
+      waiters_and_state.state() == State::kError) {
+    DCHECK_EQ(waiters_and_state.waiter(), nullptr);
     waiter();
     return;
   }
-  EnqueueWaiter(std::forward<Waiter>(waiter), old_value);
+  EnqueueWaiter(std::forward<Waiter>(waiter), waiters_and_state);
 }
 template <typename Waiter>
@@ -998,18 +1026,19 @@ void AsyncValue::AndThen(Executor& executor, Waiter&& waiter) {
   // Clients generally want to use AndThen without them each having to check
   // to see if the value is present. Check for them, and immediately run the
   // waiter if it is already here.
-  auto old_value = waiters_and_state_.load(std::memory_order_acquire);
-  if (old_value.state() == State::kConcrete ||
-      old_value.state() == State::kError) {
-    DCHECK_EQ(old_value.waiter(), nullptr);
+  auto waiters_and_state = waiters_and_state_.load(std::memory_order_acquire);
+  if (waiters_and_state.state() == State::kConcrete ||
+      waiters_and_state.state() == State::kError) {
+    DCHECK_EQ(waiters_and_state.waiter(), nullptr);
     executor.Execute(std::forward<Waiter>(waiter));
     return;
   }
   EnqueueWaiter(
-      [&executor, waiter = std::forward<Waiter>(waiter)]() mutable {
+      [&executor, waiter = std::forward<Waiter>(waiter)] {
         executor.Execute(std::move(waiter));
       },
-      old_value);
+      waiters_and_state);
 }
 inline void AsyncValue::Destroy() {

tensorflow/include/xla/tsl/framework/convolution/eigen_spatial_convolutions-inl.h CHANGED Viewed

@@ -1604,12 +1604,12 @@ SpatialConvolution(const Input& input, const Kernel& kernel,
                    Index padding_left = 0, Index padding_right = 0) {
   typedef typename internal::traits<Input>::Index TensorIndex;
   typedef typename internal::traits<Input>::Scalar InputScalar;
-  TensorRef<Tensor<InputScalar, internal::traits<Input>::NumDimensions,
-                   internal::traits<Input>::Layout, TensorIndex> >
+  TensorRef<const Tensor<InputScalar, internal::traits<Input>::NumDimensions,
+                         internal::traits<Input>::Layout, TensorIndex> >
       in(input);
-  TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
-                   internal::traits<Kernel>::NumDimensions,
-                   internal::traits<Kernel>::Layout, TensorIndex> >
+  TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
+                         internal::traits<Kernel>::NumDimensions,
+                         internal::traits<Kernel>::Layout, TensorIndex> >
       kern(kernel);
   EIGEN_STATIC_ASSERT(

tensorflow/lite/experimental/microfrontend/python/ops/_audio_microfrontend_op.so CHANGED Viewed

Binary file

tensorflow/lite/python/analyzer_wrapper/_pywrap_analyzer_wrapper.pyd CHANGED Viewed

Binary file

tensorflow/lite/python/interpreter_wrapper/_pywrap_tensorflow_interpreter_wrapper.pyd CHANGED Viewed

Binary file

tensorflow/lite/python/optimize/_pywrap_tensorflow_lite_calibration_wrapper.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_dtensor_device.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_mlir.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_parallel_device.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_quantize_training.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_tensorflow_internal.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_tfcompile.pyd CHANGED Viewed

Binary file

tensorflow/python/_pywrap_tfe.pyd CHANGED Viewed

Binary file

tensorflow/python/client/_pywrap_debug_events_writer.pyd CHANGED Viewed

Binary file

tensorflow/python/client/_pywrap_device_lib.pyd CHANGED Viewed

Binary file

tensorflow/python/client/_pywrap_events_writer.pyd CHANGED Viewed

Binary file

tensorflow/python/client/_pywrap_tf_session.pyd CHANGED Viewed

Binary file

tensorflow/python/compat/compat.py CHANGED Viewed

@@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2025, 2, 19)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2025, 2, 21)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None

tensorflow/python/data/experimental/service/_pywrap_server_lib.pyd CHANGED Viewed

Binary file

tensorflow/python/data/experimental/service/_pywrap_utils_exp.pyd CHANGED Viewed

Binary file

tensorflow/python/eager/imperative_grad.py CHANGED Viewed

@@ -43,13 +43,13 @@ def imperative_grad(tape,
    target: either a Tensor or list of Tensors to be differentiated.
    sources: list of Tensors for which we want gradients
    output_gradients: if not None, a list of gradient provided for each Target,
-    or None if we are to use the target's computed downstream gradient.
+     or None if we are to use the target's computed downstream gradient.
    sources_raw: if not None, a list of the source python objects from which the
-    sources were generated. Should have the same length as sources. Only needs
-    to be populated if unconnected_gradients is 'zero'.
+     sources were generated. Should have the same length as sources. Only needs
+     to be populated if unconnected_gradients is 'zero'.
    unconnected_gradients: determines the value returned if the target and
-    sources are unconnected. When 'none' the value returned is None wheras when
-    'zero' a zero tensor in the same shape as the sources is returned.
+     sources are unconnected. When 'none' the value returned is None whereas
+     when 'zero' a zero tensor in the same shape as the sources is returned.
   Returns:
    the gradient wrt each of the sources.

tensorflow/python/eager/polymorphic_function/atomic_function.py CHANGED Viewed

@@ -55,7 +55,7 @@ class CallOptions:
   # Used by ACD to list Ops/Tensors/Callables that must be called in advance.
   control_captures: List[Any] = dataclasses.field(default_factory=list)
-  # Determines what kind of partitoned call is used for this function.
+  # Determines what kind of partitioned call is used for this function.
   is_stateful: bool = False

tensorflow/python/eager/polymorphic_function/compiler_ir.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Implmentation for defining get_compiler_ir."""
+"""Implementation for defining get_compiler_ir."""
 from typing import List, Optional
 import warnings

tensorflow/python/eager/polymorphic_function/polymorphic_function.py CHANGED Viewed

@@ -966,7 +966,7 @@ class Function(core.PolymorphicFunction, trackable.Trackable):
     def _check_inputs(args, kwargs):
       all_inputs = list(args) + list(kwargs.values())
-      # Emtpy input is okay.
+      # Empty input is okay.
       if not all_inputs:
         return
       if any(map(is_tensor_spec, all_inputs)) and any(
@@ -1423,7 +1423,8 @@ def function(
   thought of as compile-time constants), and builds a separate `tf.Graph` for
   each set of Python arguments that it encounters.
   For more information, see the
-  [tf.function guide](https://www.tensorflow.org/guide/function#rules_of_tracing)
+  [tf.function
+  guide](https://www.tensorflow.org/guide/function#rules_of_tracing)
   Executing a `PolymorphicFunction` will select and execute the appropriate
   `ConcreteFunction` based on the argument types and values.
@@ -1440,14 +1441,17 @@ def function(
   >>> isinstance(f.get_concrete_function(1).graph, tf.Graph)
   True
-  `ConcreteFunction`s can be executed just like `PolymorphicFunction`s, but their
-  input is resticted to the types to which they're specialized.
+  `ConcreteFunction`s can be executed just like `PolymorphicFunction`s, but
+  their
+  input is restricted to the types to which they're specialized.
   ## Retracing
-  `ConcreteFunctions` are built (traced) on the fly, as the `PolymorphicFunction` is
+  `ConcreteFunctions` are built (traced) on the fly, as the
+  `PolymorphicFunction` is
   called with new TensorFlow types or shapes, or with new Python values as
-  arguments. When `PolymorphicFunction` builds a new trace, it is said that `func`
+  arguments. When `PolymorphicFunction` builds a new trace, it is said that
+  `func`
   is retraced. Retracing is a frequent performance concern for `tf.function` as
   it can be considerably slower than executing a graph that's already been
   traced. It is ideal to minimize the amount of retracing in your code.
@@ -1473,7 +1477,8 @@ def function(
   ## Input signatures
-  For Tensor arguments, `PolymorphicFunction`creates a new `ConcreteFunction` for
+  For Tensor arguments, `PolymorphicFunction`creates a new `ConcreteFunction`
+  for
   every unique set of input shapes and datatypes. The example below creates two
   separate `ConcreteFunction`s, each specialized to a different shape:
@@ -1580,59 +1585,58 @@ def function(
       `func` must be a `Tensor`, and `func` cannot accept `**kwargs`.
     autograph: Whether autograph should be applied on `func` before tracing a
       graph. Data-dependent Python control flow statements require
-      `autograph=True`. For more information, see the
-      [tf.function and AutoGraph guide](
+      `autograph=True`. For more information, see the [tf.function and AutoGraph
+      guide](
       https://www.tensorflow.org/guide/function#autograph_transformations).
     jit_compile: If `True`, compiles the function using
       [XLA](https://tensorflow.org/xla). XLA performs compiler optimizations,
       such as fusion, and attempts to emit more efficient code. This may
-      drastically improve the performance. If set to `True`,
-      the whole function needs to be compilable by XLA, or an
-      `errors.InvalidArgumentError` is thrown.
-      If `None` (default), compiles the function with XLA when running on TPU
-      and goes through the regular function execution path when running on
-      other devices.
-      If `False`, executes the function without XLA compilation.  Set this value
-      to `False` when directly running a multi-device function on TPUs (e.g. two
-      TPU cores, one TPU core and its host CPU).
-      Not all functions are compilable, see a list of
-      [sharp corners](https://tensorflow.org/xla/known_issues).
-    reduce_retracing: When True, `tf.function` attempts to reduce the
-      amount of retracing, for example by using more generic shapes. This
-      can be controlled for user objects by customizing their associated
+      drastically improve the performance. If set to `True`, the whole function
+      needs to be compilable by XLA, or an `errors.InvalidArgumentError` is
+      thrown. If `None` (default), compiles the function with XLA when running
+      on TPU and goes through the regular function execution path when running
+      on other devices. If `False`, executes the function without XLA
+      compilation.  Set this value to `False` when directly running a
+      multi-device function on TPUs (e.g. two TPU cores, one TPU core and its
+      host CPU). Not all functions are compilable, see a list of [sharp
+      corners](https://tensorflow.org/xla/known_issues).
+    reduce_retracing: When True, `tf.function` attempts to reduce the amount of
+      retracing, for example by using more generic shapes. This can be
+      controlled for user objects by customizing their associated
       `tf.types.experimental.TraceType`.
     experimental_implements: If provided, contains a name of a "known" function
-      this implements. For example "mycompany.my_recurrent_cell".
-      This is stored as an attribute in inference function,
-      which can then be detected when processing serialized function.
-      See [standardizing composite ops](https://github.com/tensorflow/community/blob/master/rfcs/20190610-standardizing-composite_ops.md)  # pylint: disable=line-too-long
-      for details.  For an example of utilizing this attribute see this
+      this implements. For example "mycompany.my_recurrent_cell". This is stored
+      as an attribute in inference function, which can then be detected when
+      processing serialized function. See [standardizing composite
+      ops](https://github.com/tensorflow/community/blob/master/rfcs/20190610-standardizing-composite_ops.md)
+      # pylint: disable=line-too-long for details.  For an example of utilizing
+      this attribute see this
       [example](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc)
       The code above automatically detects and substitutes function that
       implements "embedded_matmul" and allows TFLite to substitute its own
-      implementations. For instance, a tensorflow user can use this
-       attribute to mark that their function also implements
-      `embedded_matmul` (perhaps more efficiently!)
-      by specifying it using this parameter:
-      `@tf.function(experimental_implements="embedded_matmul")`
-      This can either be specified as just the string name of the function or
-      a NameAttrList corresponding to a list of key-value attributes associated
-      with the function name. The name of the function will be in the 'name'
-      field of the NameAttrList. To define a formal TF op for this function
-      implements, try the experimental [composite TF](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/tfr)
+      implementations. For instance, a tensorflow user can use this attribute to
+      mark that their function also implements `embedded_matmul` (perhaps more
+      efficiently!) by specifying it using this parameter:
+      `@tf.function(experimental_implements="embedded_matmul")` This can either
+      be specified as just the string name of the function or a NameAttrList
+      corresponding to a list of key-value attributes associated with the
+      function name. The name of the function will be in the 'name' field of the
+      NameAttrList. To define a formal TF op for this function implements, try
+      the experimental [composite
+      TF](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/compiler/mlir/tfr)
       project.
     experimental_autograph_options: Optional tuple of
       `tf.autograph.experimental.Feature` values.
     experimental_attributes: Optional dictionary of attributes to include in the
       generated FunctionDefs.
-    experimental_relax_shapes: Deprecated. Use `reduce_retracing`
-      instead.
+    experimental_relax_shapes: Deprecated. Use `reduce_retracing` instead.
     experimental_compile: Deprecated alias to 'jit_compile'.
     experimental_follow_type_hints: Deprecated. Please use input_signature or
       reduce_retracing instead.
   Returns:
-     If `func` is not None, returns a `tf.types.experimental.PolymorphicFunction`.
+     If `func` is not None, returns a
+     `tf.types.experimental.PolymorphicFunction`.
      If `func` is None, returns a decorator that, when invoked with a single
      `func` argument, returns a `tf.types.experimental.PolymorphicFunction`.

tensorflow/python/eager/tape.py CHANGED Viewed

@@ -48,9 +48,9 @@ def watch(tape, tensor):
 def default_get_variables(variable):
   return [variable]
-# Gets a list of changed variables. Can be overriden using
+# Gets a list of changed variables. Can be overridden using
 # register_variables_override. An example of overriding is for getting the
-# varibles within a distributed context.
+# variables within a distributed context.
 _variables_override = default_get_variables

tensorflow/python/framework/_dtypes.pyd CHANGED Viewed

Binary file

tensorflow/python/framework/_op_def_library_pybind.pyd CHANGED Viewed

Binary file

tensorflow/python/framework/_op_def_registry.pyd CHANGED Viewed

Binary file

tensorflow/python/framework/_proto_comparators.pyd CHANGED Viewed

Binary file

tensorflow/python/framework/_pywrap_python_op_gen.pyd CHANGED Viewed

Binary file

tensorflow/python/framework/_test_metrics_util.pyd CHANGED Viewed

Binary file

tensorflow/python/grappler/_pywrap_tf_cluster.pyd CHANGED Viewed

Binary file

tensorflow/python/grappler/_pywrap_tf_item.pyd CHANGED Viewed

Binary file

tensorflow/python/grappler/_pywrap_tf_optimizer.pyd CHANGED Viewed

Binary file

tensorflow/python/lib/core/_pywrap_py_func.pyd CHANGED Viewed

Binary file

tensorflow/python/lib/io/_pywrap_file_io.pyd CHANGED Viewed

Binary file

tensorflow/python/lib/io/_pywrap_record_io.pyd CHANGED Viewed

Binary file

tensorflow/python/ops/summary_ops_v2.py CHANGED Viewed

@@ -151,7 +151,11 @@ def _legacy_contrib_should_record_summaries():
 def is_recording_summaries():
   """Returns non-Tensor boolean indicating if summaries are being recorded."""
-  return _summary_state.is_recording is not None and _summary_state.is_recording
+  if _summary_state.writer is None:
+    return False
+  if _summary_state.is_recording is None:
+    return False
+  return _summary_state.is_recording
 @tf_export("summary.record_if", v1=[])

tensorflow/python/platform/_pywrap_tf2.pyd CHANGED Viewed

Binary file

tensorflow/python/profiler/internal/_pywrap_profiler.pyd CHANGED Viewed

Binary file

tensorflow/python/profiler/internal/_pywrap_profiler_plugin.pyd CHANGED Viewed

Binary file

tensorflow/python/saved_model/pywrap_saved_model.pyd CHANGED Viewed

Binary file

tensorflow/python/tpu/_pywrap_sparse_core_layout.pyd CHANGED Viewed

Binary file

tensorflow/python/tpu/_pywrap_tpu_embedding.pyd CHANGED Viewed

Binary file

tensorflow/python/tpu/tpu_embedding_v3.py CHANGED Viewed

@@ -67,6 +67,7 @@ from tensorflow.python.util.tf_export import tf_export
 _PIPELINE_ATTRIBUTE = "_embedding_pipelining"
 _PIPELINE_MODE_FORWARD = "forward"
 _PIPELINE_MODE_BACKWARD = "backward"
+_PIPELINE_MODEL_SEQUENTIAL = "_sequential"
 TableConfig = tpu_embedding_v2_utils.TableConfig
@@ -95,15 +96,21 @@ class EmbeddingPipeliningContext(control_flow_ops.ControlFlowContext):
     super().__init__()
     self._name = "EmbeddingPipelinigContext"
     self._mode = attr_value_pb2.AttrValue(s=compat.as_bytes(mode))
+    self._enable = enable
     recording_summaries = summary_ops_v2.is_recording_summaries()
+    if not isinstance(recording_summaries, bool):
+      # We can't handle predicate functions at this point. So, we'll ignore the
+      # special casing of summary recording because, presumably, this is not
+      # a single step loop so pipelining is still valid.
+      recording_summaries = False
     if enable and recording_summaries:
-      logging.info(
-          "Embedding pipelining requested but summaries are being recorded:"
-          " Disabling embedding pipelining."
+      # We'll still flag these ops for the SC forward/backward pass, but we'll
+      # run them sequentially. This has to be handled in the MLIR passes
+      # embedding_pipelining.cc and embedding_sequencing.cc.
+      logging.info("Summary recording detected, disabling pipelining.")
+      self._mode = attr_value_pb2.AttrValue(
+          s=compat.as_bytes(mode + _PIPELINE_MODEL_SEQUENTIAL)
       )
-      self._enable = False
-    else:
-      self._enable = enable
   def to_control_flow_context_def(
       self, context_def: Any, export_scope: Any = None
@@ -1637,7 +1644,7 @@ class TPUEmbeddingV2(tpu_embedding_base.TPUEmbeddingBase):
       row_offset: int,
       col_offset: int,
       col_shift: int,
-      vocab_size: int,
+      unused_vocab_size: int,
       num_sc_per_chip: int,
       num_sc_shards: int,
       stacked_table_sample_count: int,