tf-nightly-cpu 2.20.0.dev20250220__cp310-cp310-win_amd64.whl → 2.20.0.dev20250222__cp310-cp310-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- tensorflow/_api/v2/compat/v1/summary/__init__.py +2 -2
- tensorflow/_api/v2/compat/v1/tpu/experimental/embedding/__init__.py +2 -2
- tensorflow/_api/v2/compat/v2/summary/__init__.py +10 -10
- tensorflow/_api/v2/compat/v2/summary/experimental/__init__.py +4 -4
- tensorflow/_api/v2/compat/v2/tpu/experimental/embedding/__init__.py +2 -2
- tensorflow/_api/v2/summary/__init__.py +10 -10
- tensorflow/_api/v2/summary/experimental/__init__.py +4 -4
- tensorflow/_api/v2/tpu/experimental/embedding/__init__.py +2 -2
- tensorflow/compiler/mlir/stablehlo/stablehlo_extension.pyd +0 -0
- tensorflow/compiler/tf2tensorrt/_pywrap_py_utils.pyd +0 -0
- tensorflow/compiler/tf2xla/ops/_xla_ops.so +0 -0
- tensorflow/include/external/llvm-project/mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h +12 -0
- tensorflow/include/external/llvm-project/mlir/include/mlir/Dialect/Math/IR/MathOps.h.inc +4 -0
- tensorflow/include/external/shardy/shardy/dialect/sdy/transforms/propagation/aggressive_factor_propagation.h +9 -0
- tensorflow/include/external/stablehlo/_virtual_includes/stablehlo_pass_utils/stablehlo/transforms/PassUtils.h +7 -0
- tensorflow/include/external/stablehlo/_virtual_includes/stablehlo_passes/stablehlo/transforms/PassUtils.h +7 -0
- tensorflow/include/external/stablehlo/_virtual_includes/version/stablehlo/dialect/Version.h +1 -1
- tensorflow/include/external/stablehlo/stablehlo/dialect/Version.h +1 -1
- tensorflow/include/external/stablehlo/stablehlo/transforms/PassUtils.h +7 -0
- tensorflow/include/tensorflow/compiler/xla/backends/cpu/codegen/kernel_api_ir_builder.h +3 -2
- tensorflow/include/tensorflow/compiler/xla/backends/cpu/runtime/convolution_thunk_internal.h +8 -10
- tensorflow/include/tensorflow/compiler/xla/backends/cpu/runtime/kernel_thunk.h +9 -3
- tensorflow/include/tensorflow/compiler/xla/backends/cpu/runtime/work_queue.h +81 -19
- tensorflow/include/tensorflow/compiler/xla/codegen/kernel_spec.h +24 -7
- tensorflow/include/tensorflow/compiler/xla/hlo/ir/hlo_casting_utils.h +0 -44
- tensorflow/include/tensorflow/compiler/xla/hlo/ir/hlo_instruction.h +12 -0
- tensorflow/include/tensorflow/compiler/xla/mlir_hlo/_virtual_includes/stablehlo_extension_pass_inc_gen/stablehlo_ext/transforms/passes.h.inc +149 -4
- tensorflow/include/tensorflow/compiler/xla/mlir_hlo/stablehlo_ext/transforms/passes.h.inc +149 -4
- tensorflow/include/tensorflow/compiler/xla/pjrt/distributed/client.h +5 -0
- tensorflow/include/tensorflow/compiler/xla/pjrt/gpu/se_gpu_pjrt_client.h +1 -92
- tensorflow/include/tensorflow/compiler/xla/pjrt/gpu/se_gpu_topology_description.h +126 -0
- tensorflow/include/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.h +1 -49
- tensorflow/include/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_device_description.h +75 -0
- tensorflow/include/tensorflow/compiler/xla/pjrt/plugin/xla_cpu/cpu_execute_options.h +57 -0
- tensorflow/include/tensorflow/compiler/xla/pjrt/plugin/xla_cpu/cpu_topology.h +4 -0
- tensorflow/include/tensorflow/compiler/xla/service/constant_value.h +1 -0
- tensorflow/include/tensorflow/compiler/xla/service/hlo_module_util.h +52 -1
- tensorflow/include/tensorflow/compiler/xla/service/hlo_proto_util.h +0 -12
- tensorflow/include/tensorflow/compiler/xla/tsl/concurrency/async_value.h +50 -21
- tensorflow/include/tensorflow/compiler/xla/tsl/framework/convolution/eigen_spatial_convolutions-inl.h +5 -5
- tensorflow/include/tensorflow/core/kernels/data/experimental/random_access_ops.h +0 -2
- tensorflow/include/tensorflow/core/kernels/eigen_attention.h +4 -4
- tensorflow/include/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h +6 -6
- tensorflow/include/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h +10 -8
- tensorflow/include/tensorflow/core/kernels/eigen_cuboid_convolution.h +6 -6
- tensorflow/include/tensorflow/core/kernels/eigen_pooling.h +12 -12
- tensorflow/include/tensorflow/core/public/release_version.h +39 -0
- tensorflow/include/tensorflow/core/public/version.h +112 -127
- tensorflow/include/tensorflow/python/eager/pywrap_tfe.h +1 -1
- tensorflow/include/xla/backends/cpu/codegen/kernel_api_ir_builder.h +3 -2
- tensorflow/include/xla/backends/cpu/runtime/convolution_thunk_internal.h +8 -10
- tensorflow/include/xla/backends/cpu/runtime/kernel_thunk.h +9 -3
- tensorflow/include/xla/backends/cpu/runtime/work_queue.h +81 -19
- tensorflow/include/xla/codegen/kernel_spec.h +24 -7
- tensorflow/include/xla/hlo/ir/hlo_casting_utils.h +0 -44
- tensorflow/include/xla/hlo/ir/hlo_instruction.h +12 -0
- tensorflow/include/xla/mlir_hlo/_virtual_includes/stablehlo_extension_pass_inc_gen/stablehlo_ext/transforms/passes.h.inc +149 -4
- tensorflow/include/xla/mlir_hlo/stablehlo_ext/transforms/passes.h.inc +149 -4
- tensorflow/include/xla/pjrt/distributed/client.h +5 -0
- tensorflow/include/xla/pjrt/gpu/se_gpu_pjrt_client.h +1 -92
- tensorflow/include/xla/pjrt/gpu/se_gpu_topology_description.h +126 -0
- tensorflow/include/xla/pjrt/pjrt_stream_executor_client.h +1 -49
- tensorflow/include/xla/pjrt/pjrt_stream_executor_device_description.h +75 -0
- tensorflow/include/xla/pjrt/plugin/xla_cpu/cpu_execute_options.h +57 -0
- tensorflow/include/xla/pjrt/plugin/xla_cpu/cpu_topology.h +4 -0
- tensorflow/include/xla/service/constant_value.h +1 -0
- tensorflow/include/xla/service/hlo_module_util.h +52 -1
- tensorflow/include/xla/service/hlo_proto_util.h +0 -12
- tensorflow/include/xla/tsl/concurrency/async_value.h +50 -21
- tensorflow/include/xla/tsl/framework/convolution/eigen_spatial_convolutions-inl.h +5 -5
- tensorflow/lite/experimental/microfrontend/python/ops/_audio_microfrontend_op.so +0 -0
- tensorflow/lite/python/analyzer_wrapper/_pywrap_analyzer_wrapper.pyd +0 -0
- tensorflow/lite/python/interpreter_wrapper/_pywrap_tensorflow_interpreter_wrapper.pyd +0 -0
- tensorflow/lite/python/optimize/_pywrap_tensorflow_lite_calibration_wrapper.pyd +0 -0
- tensorflow/python/_pywrap_dtensor_device.pyd +0 -0
- tensorflow/python/_pywrap_mlir.pyd +0 -0
- tensorflow/python/_pywrap_parallel_device.pyd +0 -0
- tensorflow/python/_pywrap_quantize_training.pyd +0 -0
- tensorflow/python/_pywrap_tensorflow_internal.pyd +0 -0
- tensorflow/python/_pywrap_tfcompile.pyd +0 -0
- tensorflow/python/_pywrap_tfe.pyd +0 -0
- tensorflow/python/client/_pywrap_debug_events_writer.pyd +0 -0
- tensorflow/python/client/_pywrap_device_lib.pyd +0 -0
- tensorflow/python/client/_pywrap_events_writer.pyd +0 -0
- tensorflow/python/client/_pywrap_tf_session.pyd +0 -0
- tensorflow/python/compat/compat.py +1 -1
- tensorflow/python/data/experimental/service/_pywrap_server_lib.pyd +0 -0
- tensorflow/python/eager/imperative_grad.py +5 -5
- tensorflow/python/eager/polymorphic_function/atomic_function.py +1 -1
- tensorflow/python/eager/polymorphic_function/compiler_ir.py +1 -1
- tensorflow/python/eager/polymorphic_function/polymorphic_function.py +45 -41
- tensorflow/python/eager/tape.py +2 -2
- tensorflow/python/framework/_dtypes.pyd +0 -0
- tensorflow/python/framework/_op_def_library_pybind.pyd +0 -0
- tensorflow/python/framework/_op_def_registry.pyd +0 -0
- tensorflow/python/framework/_proto_comparators.pyd +0 -0
- tensorflow/python/framework/_pywrap_python_op_gen.pyd +0 -0
- tensorflow/python/framework/_test_metrics_util.pyd +0 -0
- tensorflow/python/grappler/_pywrap_tf_cluster.pyd +0 -0
- tensorflow/python/grappler/_pywrap_tf_item.pyd +0 -0
- tensorflow/python/grappler/_pywrap_tf_optimizer.pyd +0 -0
- tensorflow/python/lib/core/_pywrap_py_func.pyd +0 -0
- tensorflow/python/lib/io/_pywrap_file_io.pyd +0 -0
- tensorflow/python/lib/io/_pywrap_record_io.pyd +0 -0
- tensorflow/python/ops/summary_ops_v2.py +5 -1
- tensorflow/python/profiler/internal/_pywrap_profiler.pyd +0 -0
- tensorflow/python/profiler/internal/_pywrap_profiler_plugin.pyd +0 -0
- tensorflow/python/saved_model/pywrap_saved_model.pyd +0 -0
- tensorflow/python/tpu/_pywrap_sparse_core_layout.pyd +0 -0
- tensorflow/python/tpu/_pywrap_tpu_embedding.pyd +0 -0
- tensorflow/python/tpu/tpu_embedding_v3.py +14 -7
- tensorflow/python/tpu/tpu_embedding_v3_checkpoint_adapter.py +10 -1
- tensorflow/python/util/_pywrap_checkpoint_reader.pyd +0 -0
- tensorflow/python/util/_pywrap_kernel_registry.pyd +0 -0
- tensorflow/python/util/_pywrap_stat_summarizer.pyd +0 -0
- tensorflow/python/util/_pywrap_tfprof.pyd +0 -0
- tensorflow/python/util/_pywrap_transform_graph.pyd +0 -0
- tensorflow/python/util/_pywrap_utils.pyd +0 -0
- tensorflow/python/util/_tf_stack.pyd +0 -0
- tensorflow/tools/pip_package/setup.py +2 -2
- tensorflow/xla_aot_runtime_src/xla/tsl/concurrency/async_value.cc +26 -51
- {tf_nightly_cpu-2.20.0.dev20250220.dist-info → tf_nightly_cpu-2.20.0.dev20250222.dist-info}/METADATA +1 -1
- {tf_nightly_cpu-2.20.0.dev20250220.dist-info → tf_nightly_cpu-2.20.0.dev20250222.dist-info}/RECORD +126 -121
- tensorflow/include/tensorflow/compiler/xla/backends/cpu/runtime/concurrency.h +0 -77
- tensorflow/include/xla/backends/cpu/runtime/concurrency.h +0 -77
- {tf_nightly_cpu-2.20.0.dev20250220.dist-info → tf_nightly_cpu-2.20.0.dev20250222.dist-info}/WHEEL +0 -0
- {tf_nightly_cpu-2.20.0.dev20250220.dist-info → tf_nightly_cpu-2.20.0.dev20250222.dist-info}/entry_points.txt +0 -0
- {tf_nightly_cpu-2.20.0.dev20250220.dist-info → tf_nightly_cpu-2.20.0.dev20250222.dist-info}/top_level.txt +0 -0
@@ -100,11 +100,12 @@ SpatialConvolutionBackwardInput(
|
|
100
100
|
const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
|
101
101
|
typedef typename internal::traits<OutputBackward>::Index TensorIndex;
|
102
102
|
typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
|
103
|
-
TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
|
104
|
-
|
105
|
-
|
103
|
+
TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
|
104
|
+
internal::traits<Kernel>::NumDimensions,
|
105
|
+
internal::traits<Kernel>::Layout, TensorIndex>>
|
106
106
|
kern(kernel);
|
107
|
-
TensorRef<
|
107
|
+
TensorRef<
|
108
|
+
const Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
|
108
109
|
internal::traits<OutputBackward>::Layout, TensorIndex>>
|
109
110
|
out(output_backward);
|
110
111
|
|
@@ -385,11 +386,12 @@ SpatialConvolutionBackwardKernel(
|
|
385
386
|
const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) {
|
386
387
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
387
388
|
typedef typename internal::traits<OutputBackward>::Scalar OutScalar;
|
388
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
389
|
-
|
390
|
-
|
389
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
390
|
+
internal::traits<Input>::NumDimensions,
|
391
|
+
internal::traits<Input>::Layout, TensorIndex>>
|
391
392
|
in(input);
|
392
|
-
TensorRef<
|
393
|
+
TensorRef<
|
394
|
+
const Tensor<OutScalar, internal::traits<OutputBackward>::NumDimensions,
|
393
395
|
internal::traits<OutputBackward>::Layout, TensorIndex>>
|
394
396
|
out(output_backward);
|
395
397
|
|
@@ -1843,13 +1843,13 @@ CuboidConvolution(const Input& input, const Kernel& kernel,
|
|
1843
1843
|
const Index strideCols = 1,
|
1844
1844
|
const PaddingType padding_type = PADDING_SAME) {
|
1845
1845
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
1846
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
1847
|
-
|
1848
|
-
|
1846
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
1847
|
+
internal::traits<Input>::NumDimensions,
|
1848
|
+
internal::traits<Input>::Layout, TensorIndex> >
|
1849
1849
|
in(input);
|
1850
|
-
TensorRef<Tensor<typename internal::traits<Kernel>::Scalar,
|
1851
|
-
|
1852
|
-
|
1850
|
+
TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar,
|
1851
|
+
internal::traits<Kernel>::NumDimensions,
|
1852
|
+
internal::traits<Kernel>::Layout, TensorIndex> >
|
1853
1853
|
kern(kernel);
|
1854
1854
|
|
1855
1855
|
EIGEN_STATIC_ASSERT(
|
@@ -55,9 +55,9 @@ SpatialMaxPooling(const Input& input, DenseIndex patchRows,
|
|
55
55
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
56
56
|
|
57
57
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
58
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
59
|
-
|
60
|
-
|
58
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
59
|
+
internal::traits<Input>::NumDimensions,
|
60
|
+
internal::traits<Input>::Layout, TensorIndex>>
|
61
61
|
in(input);
|
62
62
|
|
63
63
|
const DenseIndex patchRowsEff =
|
@@ -148,9 +148,9 @@ CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
|
|
148
148
|
static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
|
149
149
|
|
150
150
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
151
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
152
|
-
|
153
|
-
|
151
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
152
|
+
internal::traits<Input>::NumDimensions,
|
153
|
+
internal::traits<Input>::Layout, TensorIndex>>
|
154
154
|
in(input);
|
155
155
|
|
156
156
|
static const int idxPlanes = isColMajor ? 1 : 3;
|
@@ -383,9 +383,9 @@ SpatialAvgPooling(const Input& input, DenseIndex patchRows,
|
|
383
383
|
YOU_MADE_A_PROGRAMMING_MISTAKE);
|
384
384
|
|
385
385
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
386
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
387
|
-
|
388
|
-
|
386
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
387
|
+
internal::traits<Input>::NumDimensions,
|
388
|
+
internal::traits<Input>::Layout, TensorIndex>>
|
389
389
|
in(input);
|
390
390
|
|
391
391
|
const DenseIndex patchRowsEff =
|
@@ -475,9 +475,9 @@ CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
|
|
475
475
|
static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
|
476
476
|
|
477
477
|
typedef typename internal::traits<Input>::Index TensorIndex;
|
478
|
-
TensorRef<Tensor<typename internal::traits<Input>::Scalar,
|
479
|
-
|
480
|
-
|
478
|
+
TensorRef<const Tensor<typename internal::traits<Input>::Scalar,
|
479
|
+
internal::traits<Input>::NumDimensions,
|
480
|
+
internal::traits<Input>::Layout, TensorIndex>>
|
481
481
|
in(input);
|
482
482
|
|
483
483
|
static const int idxPlanes = isColMajor ? 1 : 3;
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_
|
17
|
+
#define TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_
|
18
|
+
|
19
|
+
// TensorFlow uses semantic versioning, see http://semver.org/.
|
20
|
+
|
21
|
+
// Also update tensorflow/tensorflow.bzl and
|
22
|
+
// tensorflow/tools/pip_package/setup.py
|
23
|
+
#define TF_MAJOR_VERSION 2
|
24
|
+
#define TF_MINOR_VERSION 20
|
25
|
+
#define TF_PATCH_VERSION 0
|
26
|
+
|
27
|
+
// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
|
28
|
+
// "-beta", "-rc", "-rc.1")
|
29
|
+
#define TF_VERSION_SUFFIX "-dev20250222"
|
30
|
+
|
31
|
+
#define _TF_STR_HELPER(x) #x
|
32
|
+
#define _TF_STR(x) _TF_STR_HELPER(x)
|
33
|
+
|
34
|
+
// e.g. "0.5.0" or "0.6.0-alpha".
|
35
|
+
#define TF_VERSION_STRING \
|
36
|
+
(_TF_STR(TF_MAJOR_VERSION) "." _TF_STR(TF_MINOR_VERSION) "." _TF_STR( \
|
37
|
+
TF_PATCH_VERSION) TF_VERSION_SUFFIX)
|
38
|
+
|
39
|
+
#endif // TENSORFLOW_CORE_PUBLIC_RELEASE_VERSION_H_
|
@@ -1,127 +1,112 @@
|
|
1
|
-
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
2
|
-
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
you may not use this file except in compliance with the License.
|
5
|
-
You may obtain a copy of the License at
|
6
|
-
|
7
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
|
9
|
-
Unless required by applicable law or agreed to in writing, software
|
10
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
See the License for the specific language governing permissions and
|
13
|
-
limitations under the License.
|
14
|
-
==============================================================================*/
|
15
|
-
|
16
|
-
#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
17
|
-
#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
18
|
-
|
19
|
-
// TensorFlow uses semantic versioning, see http://semver.org/.
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
//
|
28
|
-
//
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
//
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
//
|
40
|
-
//
|
41
|
-
//
|
42
|
-
//
|
43
|
-
//
|
44
|
-
//
|
45
|
-
//
|
46
|
-
//
|
47
|
-
//
|
48
|
-
//
|
49
|
-
//
|
50
|
-
//
|
51
|
-
//
|
52
|
-
//
|
53
|
-
//
|
54
|
-
//
|
55
|
-
//
|
56
|
-
//
|
57
|
-
//
|
58
|
-
//
|
59
|
-
//
|
60
|
-
//
|
61
|
-
//
|
62
|
-
//
|
63
|
-
//
|
64
|
-
//
|
65
|
-
//
|
66
|
-
//
|
67
|
-
//
|
68
|
-
//
|
69
|
-
//
|
70
|
-
//
|
71
|
-
//
|
72
|
-
//
|
73
|
-
//
|
74
|
-
//
|
75
|
-
//
|
76
|
-
//
|
77
|
-
//
|
78
|
-
//
|
79
|
-
//
|
80
|
-
//
|
81
|
-
//
|
82
|
-
//
|
83
|
-
//
|
84
|
-
//
|
85
|
-
//
|
86
|
-
//
|
87
|
-
//
|
88
|
-
//
|
89
|
-
//
|
90
|
-
//
|
91
|
-
//
|
92
|
-
//
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
//
|
97
|
-
|
98
|
-
//
|
99
|
-
//
|
100
|
-
//
|
101
|
-
//
|
102
|
-
//
|
103
|
-
//
|
104
|
-
//
|
105
|
-
//
|
106
|
-
//
|
107
|
-
//
|
108
|
-
|
109
|
-
#define
|
110
|
-
#define
|
111
|
-
|
112
|
-
|
113
|
-
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
|
114
|
-
//
|
115
|
-
// The checkpoint versions have the same semantics as GraphDef versions, but the
|
116
|
-
// numbering scheme is separate. We have no plans to ever deprecate checkpoint
|
117
|
-
// versions, but it's good to have this in place in case we ever need to.
|
118
|
-
//
|
119
|
-
// Version history:
|
120
|
-
//
|
121
|
-
// 0. Checkpoints saved before checkpoint versioning.
|
122
|
-
// 1. First real version (10feb2015).
|
123
|
-
#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
|
124
|
-
#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
|
125
|
-
#define TF_CHECKPOINT_VERSION 1
|
126
|
-
|
127
|
-
#endif // TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
1
|
+
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
==============================================================================*/
|
15
|
+
|
16
|
+
#ifndef TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
17
|
+
#define TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
18
|
+
|
19
|
+
// TensorFlow uses semantic versioning, see http://semver.org/.
|
20
|
+
|
21
|
+
#define TF_STR_HELPER(x) #x
|
22
|
+
#define TF_STR(x) TF_STR_HELPER(x)
|
23
|
+
|
24
|
+
// GraphDef compatibility versions (the versions field in graph.proto).
|
25
|
+
//
|
26
|
+
// Each graph has producer and min_consumer versions, and each
|
27
|
+
// consumer has its own version and a min_producer. In addition, graphs can
|
28
|
+
// mark specific consumer versions as bad (to prevent bugs from executing).
|
29
|
+
// A consumer will execute a graph if the consumer's version is at least the
|
30
|
+
// graph's min_consumer, the graph's producer version is at least the consumer's
|
31
|
+
// min_producer, and the consumer version isn't specifically disallowed by the
|
32
|
+
// graph.
|
33
|
+
//
|
34
|
+
// By default, newly created graphs have producer version TF_GRAPH_DEF_VERSION
|
35
|
+
// min_consumer TF_GRAPH_DEF_MIN_CONSUMER, and no other bad consumer versions.
|
36
|
+
//
|
37
|
+
// Version history:
|
38
|
+
//
|
39
|
+
// 0. Graphs created before GraphDef versioning
|
40
|
+
// 1. First real version (2dec2015)
|
41
|
+
// 2. adjust_contrast only takes float, doesn't perform clamping (11dec2015)
|
42
|
+
// 3. Remove TileGrad, since it was equivalent to reduce_sum (30dec2015)
|
43
|
+
// 4. When support for this version is removed, we can safely make AttrValue
|
44
|
+
// parsing more strict with respect to empty list values (see
|
45
|
+
// 111635679, 7jan2016).
|
46
|
+
// 5. Graphs are wholly-validated during Session::Create() (7jan2016).
|
47
|
+
// 6. TensorFlow is scalar strict within Google (27jan2016).
|
48
|
+
// 7. Remove TopK in favor of TopKV2 (5feb2016).
|
49
|
+
// 8. Replace RandomCrop from C++ with pure Python (5feb2016).
|
50
|
+
// 9. Deprecate batch_norm_with_global_normalization (16feb2016).
|
51
|
+
// 10. Deprecate conv3d_backprop_{filter,input} (10jun2016).
|
52
|
+
// 11. Deprecate {batch}_self_adjoint_eig (3aug2016).
|
53
|
+
// 12. Graph consumers understand the node_def field of FunctionDef (22aug2016).
|
54
|
+
// 13. Deprecate multiple batch linear algebra ops (9sep2016).
|
55
|
+
// 14. Deprecate batch_matrix_* ops. (10sep2016).
|
56
|
+
// 15. Deprecate batch_fft_* ops. (14sep2016).
|
57
|
+
// 16. Deprecate tensor_array (v1) ops in favor of v2 (10nov2016).
|
58
|
+
// 17. Deprecate inv (11nov2016).
|
59
|
+
// 17. Expose reverse_v2 (10nov2016)
|
60
|
+
// 18. Add VariableV2 (30nov2016)
|
61
|
+
// 19. Deprecated ops created by models moved out of core SkipGram, NegTrain.
|
62
|
+
// (08dec2016)
|
63
|
+
// 20. Catch all version 1.0 changes to Python API generation. SplitV is now
|
64
|
+
// used for tf.split, ReverseV2 is now used by tf.reverse, ConcatV2 is
|
65
|
+
// now used by tf.concat. Graphs use flooring
|
66
|
+
// division and mod semantics. TensorArrayV3. (12dec2016)
|
67
|
+
// Also considered the version for when it is required for reduction
|
68
|
+
// ops' indices to be scalar or vector, and not higher rank.
|
69
|
+
// Some earlier graph def versions allowed this.
|
70
|
+
// 21. Dropped FunctionDef.Node support, switched to node_def introduced
|
71
|
+
// in version 12. (11jan2017)
|
72
|
+
// 22. Placeholder now can specify and enforce scalar and partial
|
73
|
+
// shapes, particularly when restoring a graph from GraphDef
|
74
|
+
// produced at version 22 or later. (04/10/2016)
|
75
|
+
// 23. Remove NonMaxSuppression in favor of NonMaxSuppressionV2.
|
76
|
+
// 24. Deprecate lookup ops (v1) ops in favor of v2 (30may2017)
|
77
|
+
// 25. Deprecate stack (v1) ops in favor of v2 (2017/6/15).
|
78
|
+
// 25. Deprecate RandomPoisson (v1) ops in favor of v2 (2017/10/25).
|
79
|
+
// 26. Add a bool 'stripped_default_attrs' to MetaInfoDef indicating
|
80
|
+
// whether default-valued attrs have been stripped from the nodes in the
|
81
|
+
// GraphDef. (7dec2017)
|
82
|
+
// 27. Deprecate TensorArray ops v2 in favor of v3 and deprecated io_ops
|
83
|
+
// deprecated in favor of V2 ops. (2018/01/23)
|
84
|
+
// 28. Deprecate MatrixExponential op in favor of Python implementation.
|
85
|
+
// (2018/08/21).
|
86
|
+
// (2019/02/15). Added `control_ret` field to FunctionDef proto, and
|
87
|
+
// `control_output` field to OpDef proto.
|
88
|
+
// 29. Deprecate StatefulStandardNormal op in favor of StatefulStandardNormalV2.
|
89
|
+
// (2019/03/25).
|
90
|
+
// (2019/04/17). Added `arg_attr` field to FunctionDefProto.
|
91
|
+
// 30. (2019/05/09) First date based GraphDef version. GraphDef
|
92
|
+
// versions advance by 1 each day after this point.
|
93
|
+
|
94
|
+
#define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
|
95
|
+
#define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
|
96
|
+
#define TF_GRAPH_DEF_VERSION 2145 // Updated: 2025/2/21
|
97
|
+
|
98
|
+
// Checkpoint compatibility versions (the versions field in SavedSliceMeta).
|
99
|
+
//
|
100
|
+
// The checkpoint versions have the same semantics as GraphDef versions, but the
|
101
|
+
// numbering scheme is separate. We have no plans to ever deprecate checkpoint
|
102
|
+
// versions, but it's good to have this in place in case we ever need to.
|
103
|
+
//
|
104
|
+
// Version history:
|
105
|
+
//
|
106
|
+
// 0. Checkpoints saved before checkpoint versioning.
|
107
|
+
// 1. First real version (10feb2015).
|
108
|
+
#define TF_CHECKPOINT_VERSION_MIN_PRODUCER 0
|
109
|
+
#define TF_CHECKPOINT_VERSION_MIN_CONSUMER 0
|
110
|
+
#define TF_CHECKPOINT_VERSION 1
|
111
|
+
|
112
|
+
#endif // TENSORFLOW_CORE_PUBLIC_VERSION_H_
|
@@ -443,7 +443,7 @@ EagerContextThreadLocalData* GetEagerContextThreadLocalData(
|
|
443
443
|
// wish to destroy thread-local state associated with a single py_eager_context
|
444
444
|
// for multiple threads, then you must call this method from each thread.
|
445
445
|
//
|
446
|
-
// Thread-local state
|
446
|
+
// Thread-local state associated with eager contexts is also automatically
|
447
447
|
// cleaned up when the thread is destroyed.
|
448
448
|
//
|
449
449
|
// This function assumes that the Python GIL is held (and does not perform its
|
@@ -89,9 +89,10 @@ class KernelApiIrBuilder {
|
|
89
89
|
// read-only if it is not aliased with any result.
|
90
90
|
absl::flat_hash_set<int64_t> invariant_arguments;
|
91
91
|
|
92
|
-
//
|
92
|
+
// The set of buffers used by this kernel, can be empty if buffer assignment
|
93
93
|
// was not provided.
|
94
|
-
absl::InlinedVector<
|
94
|
+
absl::InlinedVector<BufferAllocation::Slice, 8> argument_buffers;
|
95
|
+
absl::InlinedVector<BufferAllocation::Slice, 8> result_buffers;
|
95
96
|
};
|
96
97
|
|
97
98
|
KernelApiIrBuilder(llvm::LLVMContext& context, Options options);
|
@@ -22,7 +22,7 @@ limitations under the License.
|
|
22
22
|
#include <memory>
|
23
23
|
#include <utility>
|
24
24
|
|
25
|
-
#include "xla/backends/cpu/runtime/
|
25
|
+
#include "xla/backends/cpu/runtime/work_queue.h"
|
26
26
|
#include "xla/tsl/concurrency/async_value_ref.h"
|
27
27
|
#include "xla/tsl/concurrency/chain.h"
|
28
28
|
#include "xla/tsl/framework/convolution/eigen_spatial_convolutions.h" // IWYU pragma: keep
|
@@ -30,7 +30,6 @@ limitations under the License.
|
|
30
30
|
|
31
31
|
#define EIGEN_USE_THREADS
|
32
32
|
#include "Eigen/Core"
|
33
|
-
#include "Eigen/ThreadPool"
|
34
33
|
#include "unsupported/Eigen/CXX11/Tensor"
|
35
34
|
|
36
35
|
namespace xla::cpu::internal {
|
@@ -384,8 +383,9 @@ void EigenGenericConv2D(
|
|
384
383
|
auto num_tasks = Eigen::numext::div_ceil(feature_group_count, task_size);
|
385
384
|
|
386
385
|
if (use_thunk_runtime) {
|
387
|
-
|
388
|
-
&device, num_tasks,
|
386
|
+
Worker::Parallelize(
|
387
|
+
&device, /*num_workers=*/num_tasks, num_tasks,
|
388
|
+
[=, &device](Eigen::Index task_index) mutable {
|
389
389
|
Eigen::Index start = task_index * task_size;
|
390
390
|
Eigen::Index end = std::min(start + task_size, feature_group_count);
|
391
391
|
for (Eigen::Index i = start; i < end; ++i) {
|
@@ -395,18 +395,16 @@ void EigenGenericConv2D(
|
|
395
395
|
}
|
396
396
|
});
|
397
397
|
} else {
|
398
|
-
|
399
|
-
|
400
|
-
|
398
|
+
tsl::BlockUntilReady(Worker::Parallelize(
|
399
|
+
&device, /*num_workers=*/num_tasks, num_tasks,
|
400
|
+
[=, &device](Eigen::Index task_index) {
|
401
401
|
Eigen::Index start = task_index * task_size;
|
402
402
|
Eigen::Index end = std::min(start + task_size, feature_group_count);
|
403
403
|
for (Eigen::Index i = start; i < end; ++i) {
|
404
404
|
auto [output, convolved] = convolve_group(i);
|
405
405
|
output.device(device) = convolved;
|
406
406
|
}
|
407
|
-
|
408
|
-
});
|
409
|
-
barrier.Wait();
|
407
|
+
}));
|
410
408
|
}
|
411
409
|
|
412
410
|
} else {
|
@@ -63,6 +63,8 @@ class KernelThunkBase : public Thunk {
|
|
63
63
|
const = 0;
|
64
64
|
|
65
65
|
virtual absl::Span<const BufferAllocation::Slice> results_buffers() const = 0;
|
66
|
+
|
67
|
+
virtual const absl::flat_hash_set<int64_t>& invariant_arguments() const = 0;
|
66
68
|
};
|
67
69
|
|
68
70
|
namespace internal {
|
@@ -95,6 +97,10 @@ class KernelThunk : public KernelThunkBase {
|
|
95
97
|
return absl::MakeSpan(results_buffers_);
|
96
98
|
}
|
97
99
|
|
100
|
+
const absl::flat_hash_set<int64_t>& invariant_arguments() const final {
|
101
|
+
return invariant_arguments_;
|
102
|
+
}
|
103
|
+
|
98
104
|
protected:
|
99
105
|
tsl::AsyncValueRef<ExecuteEvent> ExecuteInternal(const ExecuteParams& params);
|
100
106
|
|
@@ -129,7 +135,7 @@ class KernelThunk : public KernelThunkBase {
|
|
129
135
|
KernelThunk(Info info,
|
130
136
|
absl::Span<const BufferAllocation::Slice> arguments_buffers,
|
131
137
|
absl::Span<const BufferAllocation::Slice> results_buffers,
|
132
|
-
|
138
|
+
absl::flat_hash_set<int64_t> invariant_arguments,
|
133
139
|
std::string kernel_name, se::ThreadDim thread_dim,
|
134
140
|
std::optional<uint64_t> min_alignment);
|
135
141
|
|
@@ -139,7 +145,7 @@ class KernelThunk : public KernelThunkBase {
|
|
139
145
|
ResultsBuffers results_buffers_;
|
140
146
|
|
141
147
|
// A set of invariant arguments (their indices).
|
142
|
-
|
148
|
+
absl::flat_hash_set<int64_t> invariant_arguments_;
|
143
149
|
|
144
150
|
size_t num_kernel_args_;
|
145
151
|
|
@@ -189,7 +195,7 @@ class KernelThunk final : public internal::KernelThunk<> {
|
|
189
195
|
absl::Span<const BufferAllocation::Slice> arguments_buffers,
|
190
196
|
absl::Span<const BufferAllocation::Slice> results_buffers,
|
191
197
|
std::string kernel_name, se::ThreadDim thread_dim,
|
192
|
-
|
198
|
+
absl::flat_hash_set<int64_t> invariant_arguments,
|
193
199
|
std::optional<uint64_t> min_alignment = std::nullopt);
|
194
200
|
|
195
201
|
static absl::StatusOr<std::unique_ptr<Thunk>> Create(
|