tinygrad 0.10.1__tar.gz → 0.10.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tinygrad-0.10.1 → tinygrad-0.10.2}/PKG-INFO +20 -8
- {tinygrad-0.10.1 → tinygrad-0.10.2}/README.md +2 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/setup.py +19 -12
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_arange.py +6 -9
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_assign.py +1 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_const_folding.py +43 -8
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_conv_shapetracker.py +1 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_copy_speed.py +5 -5
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_device_speed.py +1 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype.py +18 -3
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype_alu.py +1 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_fuzz_shape_ops.py +1 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_jit.py +5 -5
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_kernel_cache.py +4 -4
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer.py +77 -47
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_failures.py +6 -6
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_overflows.py +2 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_multitensor.py +5 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_nn.py +8 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_ops.py +118 -36
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_pickle.py +4 -4
- tinygrad-0.10.2/test/test_quantize_onnx.py +212 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_randomness.py +2 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_renderer_failures.py +6 -5
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_sample.py +2 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_schedule.py +125 -29
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_search.py +32 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_speed_v_torch.py +10 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_subbuffer.py +1 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor.py +11 -5
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_uop.py +4 -4
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tiny.py +2 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_transcendental.py +15 -4
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uop_graph.py +64 -5
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uops.py +19 -33
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uops_stats.py +3 -3
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_zero_copy.py +1 -1
- tinygrad-0.10.2/tinygrad/codegen/devectorizer.py +247 -0
- tinygrad-0.10.2/tinygrad/codegen/expander.py +121 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/kernel.py +35 -37
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/linearize.py +19 -10
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/lowerer.py +31 -8
- tinygrad-0.10.2/tinygrad/codegen/symbolic.py +476 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/transcendental.py +10 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/device.py +28 -11
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/dtype.py +12 -3
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/jit.py +3 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/multi.py +0 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/realize.py +7 -4
- tinygrad-0.10.2/tinygrad/engine/schedule.py +458 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/search.py +20 -27
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/gradient.py +3 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/helpers.py +7 -4
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/state.py +2 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/ops.py +64 -329
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/__init__.py +19 -3
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/cstyle.py +39 -18
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/llvmir.py +55 -18
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/ptx.py +6 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/wgsl.py +20 -12
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/libc.py +404 -71
- tinygrad-0.10.1/tinygrad/runtime/autogen/libpciaccess.py → tinygrad-0.10.2/tinygrad/runtime/autogen/pci.py +25 -715
- tinygrad-0.10.2/tinygrad/runtime/autogen/webgpu.py +6985 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/metal.py +28 -29
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_amd.py +37 -34
- tinygrad-0.10.1/tinygrad/runtime/ops_clang.py → tinygrad-0.10.2/tinygrad/runtime/ops_cpu.py +4 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_disk.py +1 -1
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_dsp.py +59 -33
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_llvm.py +14 -12
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_metal.py +78 -62
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_nv.py +9 -6
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_python.py +5 -5
- tinygrad-0.10.2/tinygrad/runtime/ops_webgpu.py +225 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/amdev.py +23 -11
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/ip.py +10 -10
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/elf.py +2 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/hcq.py +7 -5
- tinygrad-0.10.2/tinygrad/runtime/support/llvm.py +26 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/shapetracker.py +3 -2
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/view.py +2 -3
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/spec.py +21 -20
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/tensor.py +150 -90
- tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/highlight.min.js +1232 -0
- tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/languages/cpp.min.js +47 -0
- tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/languages/python.min.js +42 -0
- tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/styles/default.min.css +9 -0
- tinygrad-0.10.2/tinygrad/viz/assets/d3js.org/d3.v5.min.js +2 -0
- tinygrad-0.10.2/tinygrad/viz/assets/dagrejs.github.io/project/dagre-d3/latest/dagre-d3.min.js +4816 -0
- tinygrad-0.10.2/tinygrad/viz/assets/unpkg.com/@highlightjs/cdn-assets@11.10.0/styles/tokyo-night-dark.min.css +8 -0
- tinygrad-0.10.2/tinygrad/viz/index.html +544 -0
- tinygrad-0.10.2/tinygrad/viz/perfetto.html +178 -0
- tinygrad-0.10.2/tinygrad/viz/serve.py +205 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/PKG-INFO +20 -8
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/SOURCES.txt +18 -4
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/requires.txt +19 -6
- tinygrad-0.10.1/tinygrad/codegen/rewriter.py +0 -516
- tinygrad-0.10.1/tinygrad/engine/schedule.py +0 -486
- tinygrad-0.10.1/tinygrad/runtime/ops_webgpu.py +0 -63
- tinygrad-0.10.1/tinygrad/runtime/support/llvm.py +0 -32
- {tinygrad-0.10.1 → tinygrad-0.10.2}/LICENSE +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/setup.cfg +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_compile_failures.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_conv.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_fusion_op.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_gc.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_graph.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_hcq.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_image_dtype.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_dumb.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_masked_st.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_metal.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_method_cache.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_net_speed.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_ocl.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_optim.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_profiler.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_rearrange_einops.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_setitem.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_specific_conv.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_jit.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_ops.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_shapetracker.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_data.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_variable.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_to_numpy.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_winograd.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/memory.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/datasets.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/optim.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/py.typed +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/adreno.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/amd_gpu.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/comgr.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/cuda.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/hip.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/hsa.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/io_uring.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/kfd.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/kgsl.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/llvm.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/nv_gpu.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/nvrtc.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/opencl.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/qcom_dsp.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/vfio.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/cuda.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/hcq.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_cloud.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_cuda.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_gpu.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_hip.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_npy.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_qcom.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/allocator.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/compiler_cuda.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/compiler_hip.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/__init__.py +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/dependency_links.txt +0 -0
- {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: tinygrad
|
3
|
-
Version: 0.10.
|
3
|
+
Version: 0.10.2
|
4
4
|
Summary: You like pytorch? You like micrograd? You love tinygrad! <3
|
5
5
|
Author: George Hotz
|
6
6
|
License: MIT
|
@@ -20,13 +20,28 @@ Requires-Dist: typing-extensions; extra == "linting"
|
|
20
20
|
Requires-Dist: pre-commit; extra == "linting"
|
21
21
|
Requires-Dist: ruff; extra == "linting"
|
22
22
|
Requires-Dist: types-tqdm; extra == "linting"
|
23
|
+
Provides-Extra: testing-minimal
|
24
|
+
Requires-Dist: numpy; extra == "testing-minimal"
|
25
|
+
Requires-Dist: torch; extra == "testing-minimal"
|
26
|
+
Requires-Dist: pytest; extra == "testing-minimal"
|
27
|
+
Requires-Dist: pytest-xdist; extra == "testing-minimal"
|
28
|
+
Requires-Dist: hypothesis; extra == "testing-minimal"
|
29
|
+
Provides-Extra: testing-unit
|
30
|
+
Requires-Dist: numpy; extra == "testing-unit"
|
31
|
+
Requires-Dist: torch; extra == "testing-unit"
|
32
|
+
Requires-Dist: pytest; extra == "testing-unit"
|
33
|
+
Requires-Dist: pytest-xdist; extra == "testing-unit"
|
34
|
+
Requires-Dist: hypothesis; extra == "testing-unit"
|
35
|
+
Requires-Dist: tqdm; extra == "testing-unit"
|
36
|
+
Requires-Dist: safetensors; extra == "testing-unit"
|
37
|
+
Requires-Dist: tabulate; extra == "testing-unit"
|
23
38
|
Provides-Extra: testing
|
24
39
|
Requires-Dist: numpy; extra == "testing"
|
25
40
|
Requires-Dist: torch; extra == "testing"
|
26
|
-
Requires-Dist: jax; extra == "testing"
|
27
|
-
Requires-Dist: pillow; extra == "testing"
|
28
41
|
Requires-Dist: pytest; extra == "testing"
|
29
42
|
Requires-Dist: pytest-xdist; extra == "testing"
|
43
|
+
Requires-Dist: hypothesis; extra == "testing"
|
44
|
+
Requires-Dist: pillow; extra == "testing"
|
30
45
|
Requires-Dist: onnx==1.16.0; extra == "testing"
|
31
46
|
Requires-Dist: onnx2torch; extra == "testing"
|
32
47
|
Requires-Dist: opencv-python; extra == "testing"
|
@@ -39,13 +54,10 @@ Requires-Dist: tiktoken; extra == "testing"
|
|
39
54
|
Requires-Dist: blobfile; extra == "testing"
|
40
55
|
Requires-Dist: librosa; extra == "testing"
|
41
56
|
Requires-Dist: networkx; extra == "testing"
|
42
|
-
Requires-Dist: hypothesis; extra == "testing"
|
43
57
|
Requires-Dist: nibabel; extra == "testing"
|
44
58
|
Requires-Dist: bottle; extra == "testing"
|
45
59
|
Requires-Dist: ggml-python; extra == "testing"
|
46
60
|
Requires-Dist: capstone; extra == "testing"
|
47
|
-
Provides-Extra: webgpu
|
48
|
-
Requires-Dist: wgpu; extra == "webgpu"
|
49
61
|
Provides-Extra: docs
|
50
62
|
Requires-Dist: mkdocs; extra == "docs"
|
51
63
|
Requires-Dist: mkdocs-material; extra == "docs"
|
@@ -149,7 +161,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
|
|
149
161
|
tinygrad already supports numerous accelerators, including:
|
150
162
|
|
151
163
|
- [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
|
152
|
-
- [x] [
|
164
|
+
- [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
|
153
165
|
- [x] [LLVM](tinygrad/runtime/ops_llvm.py)
|
154
166
|
- [x] [METAL](tinygrad/runtime/ops_metal.py)
|
155
167
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
@@ -219,7 +231,7 @@ We'll start with what will get your PR closed with a pointer to this section:
|
|
219
231
|
|
220
232
|
- No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
|
221
233
|
- All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
|
222
|
-
- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with
|
234
|
+
- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
|
223
235
|
- In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
|
224
236
|
- If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.
|
225
237
|
|
@@ -81,7 +81,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
|
|
81
81
|
tinygrad already supports numerous accelerators, including:
|
82
82
|
|
83
83
|
- [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
|
84
|
-
- [x] [
|
84
|
+
- [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
|
85
85
|
- [x] [LLVM](tinygrad/runtime/ops_llvm.py)
|
86
86
|
- [x] [METAL](tinygrad/runtime/ops_metal.py)
|
87
87
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
@@ -151,7 +151,7 @@ We'll start with what will get your PR closed with a pointer to this section:
|
|
151
151
|
|
152
152
|
- No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
|
153
153
|
- All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
|
154
|
-
- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with
|
154
|
+
- Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
|
155
155
|
- In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
|
156
156
|
- If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.
|
157
157
|
|
@@ -7,16 +7,24 @@ directory = Path(__file__).resolve().parent
|
|
7
7
|
with open(directory / 'README.md', encoding='utf-8') as f:
|
8
8
|
long_description = f.read()
|
9
9
|
|
10
|
+
testing_minimal = [
|
11
|
+
"numpy",
|
12
|
+
"torch",
|
13
|
+
"pytest",
|
14
|
+
"pytest-xdist",
|
15
|
+
"hypothesis",
|
16
|
+
]
|
17
|
+
|
10
18
|
setup(name='tinygrad',
|
11
|
-
version='0.10.
|
19
|
+
version='0.10.2',
|
12
20
|
description='You like pytorch? You like micrograd? You love tinygrad! <3',
|
13
21
|
author='George Hotz',
|
14
22
|
license='MIT',
|
15
23
|
long_description=long_description,
|
16
24
|
long_description_content_type='text/markdown',
|
17
|
-
packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
|
25
|
+
packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine', 'tinygrad.viz',
|
18
26
|
'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.support.am', 'tinygrad.runtime.graph', 'tinygrad.shape'],
|
19
|
-
package_data = {'tinygrad': ['py.typed']},
|
27
|
+
package_data = {'tinygrad': ['py.typed'], 'tinygrad.viz': ['index.html', 'perfetto.html', 'assets/**/*']},
|
20
28
|
classifiers=[
|
21
29
|
"Programming Language :: Python :: 3",
|
22
30
|
"License :: OSI Approved :: MIT License"
|
@@ -35,13 +43,14 @@ setup(name='tinygrad',
|
|
35
43
|
"types-tqdm",
|
36
44
|
],
|
37
45
|
#'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
|
38
|
-
'
|
39
|
-
|
40
|
-
"
|
41
|
-
"
|
46
|
+
'testing_minimal': testing_minimal,
|
47
|
+
'testing_unit': testing_minimal + [
|
48
|
+
"tqdm",
|
49
|
+
"safetensors",
|
50
|
+
"tabulate" # for sz.py
|
51
|
+
],
|
52
|
+
'testing': testing_minimal + [
|
42
53
|
"pillow",
|
43
|
-
"pytest",
|
44
|
-
"pytest-xdist",
|
45
54
|
"onnx==1.16.0",
|
46
55
|
"onnx2torch",
|
47
56
|
"opencv-python",
|
@@ -54,13 +63,11 @@ setup(name='tinygrad',
|
|
54
63
|
"blobfile",
|
55
64
|
"librosa",
|
56
65
|
"networkx",
|
57
|
-
"hypothesis",
|
58
66
|
"nibabel",
|
59
67
|
"bottle",
|
60
68
|
"ggml-python",
|
61
69
|
"capstone"
|
62
70
|
],
|
63
|
-
'webgpu': ["wgpu"],
|
64
71
|
'docs': [
|
65
72
|
"mkdocs",
|
66
73
|
"mkdocs-material",
|
@@ -73,6 +80,6 @@ setup(name='tinygrad',
|
|
73
80
|
'testing_tf': [
|
74
81
|
"tensorflow==2.15.1",
|
75
82
|
"tensorflow_addons",
|
76
|
-
]
|
83
|
+
],
|
77
84
|
},
|
78
85
|
include_package_data=True)
|
@@ -66,20 +66,17 @@ class TestArange(unittest.TestCase):
|
|
66
66
|
return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
|
67
67
|
|
68
68
|
class TestIndexing(unittest.TestCase):
|
69
|
-
# update: passing after CAST_BEFORE_VIEW=1 deletion
|
70
|
-
# @unittest.expectedFailure
|
71
69
|
def test_arange_2_reduce(self):
|
72
70
|
needle = Tensor.zeros(16384, dtype=dtypes.int).contiguous()
|
73
71
|
needle[1337] = 1
|
74
72
|
needle.realize()
|
75
73
|
with Context(NOOPT=1, FUSE_ARANGE=1):
|
76
74
|
GlobalCounters.reset()
|
77
|
-
|
78
|
-
out = ((Tensor.arange(1,16385).reshape(16384,1)-1)*needle.reshape(16384,1)).sum()
|
75
|
+
out = ((Tensor.arange(1,16385)-1)*needle).sum()
|
79
76
|
sched = out.schedule()
|
80
|
-
|
77
|
+
self.assertEqual(len(sched), 1)
|
81
78
|
run_schedule(sched)
|
82
|
-
|
79
|
+
self.assertEqual(out.item(), 1337)
|
83
80
|
|
84
81
|
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
85
82
|
def test_manual_index(self):
|
@@ -95,7 +92,7 @@ class TestIndexing(unittest.TestCase):
|
|
95
92
|
full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, 256, 16384, 1))
|
96
93
|
X = full.sum(axis=(2,3))
|
97
94
|
sched = X.schedule()
|
98
|
-
|
95
|
+
self.assertEqual(len(sched), 1)
|
99
96
|
run_schedule(sched)
|
100
97
|
assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
|
101
98
|
np.testing.assert_allclose(real_index, X.numpy())
|
@@ -111,7 +108,7 @@ class TestIndexing(unittest.TestCase):
|
|
111
108
|
assert X.shape == (4,256)
|
112
109
|
sched = X.schedule()
|
113
110
|
# TODO: enable these asserts when the scheduler can handle this
|
114
|
-
#
|
111
|
+
#self.assertEqual(len(sched), 1)
|
115
112
|
run_schedule(sched)
|
116
113
|
#assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
|
117
114
|
np.testing.assert_allclose(real_index, X.numpy())
|
@@ -126,7 +123,7 @@ class TestIndexing(unittest.TestCase):
|
|
126
123
|
X = dataset[idxs]
|
127
124
|
assert X.shape == (4,256)
|
128
125
|
sched = X.schedule()
|
129
|
-
|
126
|
+
self.assertEqual(len(sched), 2)
|
130
127
|
run_schedule(sched)
|
131
128
|
assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops} != {4*16384}"
|
132
129
|
np.testing.assert_allclose(real_index, X.numpy())
|
@@ -203,6 +203,7 @@ class TestAssign(unittest.TestCase):
|
|
203
203
|
np.testing.assert_equal(b0.numpy(), 128)
|
204
204
|
np.testing.assert_equal(b1.numpy(), 608)
|
205
205
|
|
206
|
+
@unittest.skip("TODO: bring this assert back")
|
206
207
|
def test_crossunder_assign(self):
|
207
208
|
# NOTE: should *not* raise AssertionError from numpy
|
208
209
|
with self.assertRaisesRegex(RuntimeError, "cycle"):
|
@@ -1,7 +1,10 @@
|
|
1
|
-
import unittest, math
|
1
|
+
import unittest, itertools, math
|
2
|
+
from typing import Any
|
2
3
|
from tinygrad import Tensor, Device, dtypes
|
3
|
-
from tinygrad.
|
4
|
+
from tinygrad.dtype import DType
|
5
|
+
from tinygrad.ops import Ops, UOp
|
4
6
|
from tinygrad.helpers import CI
|
7
|
+
from tinygrad.codegen.devectorizer import full_graph_rewrite
|
5
8
|
import numpy as np
|
6
9
|
from tinygrad.device import is_dtype_supported
|
7
10
|
|
@@ -94,18 +97,50 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
|
|
94
97
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.ones(4))
|
95
98
|
def test_literal_one_pow(self):
|
96
99
|
_check_ast_count(0, 1 ** Tensor([1.0, 2, 3, 4]))
|
97
|
-
# TODO: pow simplification
|
98
100
|
def test_tensor_one_pow(self):
|
99
|
-
_check_ast_count(
|
101
|
+
_check_ast_count(0, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
|
102
|
+
|
103
|
+
class TestBitcastConstFolding(unittest.TestCase):
|
104
|
+
def test_scalar_bitcast(self):
|
105
|
+
def t(cases: dict[DType, Any]):
|
106
|
+
for (from_dt, from_v), (to_dt, to_v) in itertools.product(cases.items(), cases.items()):
|
107
|
+
if not math.isnan(from_v):
|
108
|
+
r = full_graph_rewrite(UOp.const(from_dt, from_v).bitcast(to_dt).sink()).src[0]
|
109
|
+
self.assertEqual(r.op, Ops.CONST, msg:=f"{from_dt} -> {to_dt} ({from_v} -> {to_v})")
|
110
|
+
self.assertEqual(r.dtype, to_dt, msg)
|
111
|
+
np.testing.assert_equal(r.arg, to_v, msg)
|
112
|
+
|
113
|
+
t({dtypes.int8: 0, dtypes.uint8: 0, dtypes.bool: False})
|
114
|
+
t({dtypes.int8: 1, dtypes.uint8: 1, dtypes.bool: True})
|
115
|
+
|
116
|
+
t({dtypes.int8: -1, dtypes.uint8: 2**8-1})
|
117
|
+
t({dtypes.int16: -1, dtypes.uint16: 2**16-1, dtypes.float16: float('nan')})
|
118
|
+
t({dtypes.int32: -1, dtypes.uint32: 2**32-1, dtypes.float32: float('nan')})
|
119
|
+
t({dtypes.int64: -1, dtypes.uint64: 2**64-1, dtypes.float64: float('nan')})
|
120
|
+
|
121
|
+
t({dtypes.int8: -2**7, dtypes.uint8: 2**7})
|
122
|
+
t({dtypes.int16: -2**15, dtypes.uint16: 2**15})
|
123
|
+
t({dtypes.int32: -2**31, dtypes.uint32: 2**31})
|
124
|
+
t({dtypes.int64: -2**63, dtypes.uint64: 2**63})
|
125
|
+
|
126
|
+
t({dtypes.int16: 13496, dtypes.uint16: 13496, dtypes.float16: 0.294921875})
|
127
|
+
t({dtypes.int32: 1050081145, dtypes.uint32: 1050081145, dtypes.float32: 0.29485681653022766})
|
128
|
+
t({dtypes.int64: 4598983288165178391, dtypes.uint64: 4598983288165178391, dtypes.float64: 0.29485681936461233})
|
129
|
+
|
130
|
+
def test_vec_bitcast(self):
|
131
|
+
r = full_graph_rewrite(UOp.const(dtypes.int32.vec(3), (-1, -2**31, 75)).bitcast(dtypes.uint32.vec(3)).sink()).src[0]
|
132
|
+
self.assertEqual(r.op, Ops.VECTORIZE)
|
133
|
+
self.assertEqual(r.dtype, dtypes.uint32.vec(3))
|
134
|
+
self.assertEqual(tuple(x.arg for x in r.src), (2**32-1, 2**31, 75))
|
100
135
|
|
101
136
|
# folds advance indexing into basic indexing
|
102
137
|
class TestIndexingConstFolding(unittest.TestCase):
|
103
138
|
def test_scalar_index(self):
|
104
139
|
t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
_check_ast_count(
|
140
|
+
# TODO: fold these
|
141
|
+
_check_ast_count(2, t[:,:,Tensor(1),:])
|
142
|
+
_check_ast_count(2, t[:,:,Tensor(1)+2,:])
|
143
|
+
_check_ast_count(2, t[:,:,Tensor(1),Tensor(0)])
|
109
144
|
|
110
145
|
@unittest.expectedFailure
|
111
146
|
def test_const_tensor_index(self):
|
@@ -26,9 +26,8 @@ class TestConvShapetracker(unittest.TestCase):
|
|
26
26
|
print(si)
|
27
27
|
ldb = [x for x in si.ast.toposort if x.op is Ops.LOAD][0]
|
28
28
|
st: ShapeTracker = ldb.st_arg.simplify()
|
29
|
-
# NOTE: st.real_size() is broken
|
30
29
|
print(si.inputs[0].size)
|
31
|
-
|
30
|
+
self.assertEqual(si.inputs[0].size, st.real_size())
|
32
31
|
for v in st.views: print(v)
|
33
32
|
|
34
33
|
# same st
|
@@ -24,7 +24,7 @@ class TestCopySpeed(unittest.TestCase):
|
|
24
24
|
s.unlink()
|
25
25
|
|
26
26
|
def testCopyCPUtoDefault(self):
|
27
|
-
t = Tensor.
|
27
|
+
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
|
28
28
|
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
|
29
29
|
for _ in range(3):
|
30
30
|
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
|
@@ -35,7 +35,7 @@ class TestCopySpeed(unittest.TestCase):
|
|
35
35
|
def testCopyCPUtoDefaultFresh(self):
|
36
36
|
print("fresh copy")
|
37
37
|
for _ in range(3):
|
38
|
-
t = Tensor.
|
38
|
+
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
|
39
39
|
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
|
40
40
|
with Timing("queue: "):
|
41
41
|
t.to(Device.DEFAULT).realize()
|
@@ -43,18 +43,18 @@ class TestCopySpeed(unittest.TestCase):
|
|
43
43
|
del t
|
44
44
|
|
45
45
|
def testCopyDefaulttoCPU(self):
|
46
|
-
t = Tensor.
|
46
|
+
t = Tensor.ones(N, N).contiguous().realize()
|
47
47
|
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
|
48
48
|
for _ in range(3):
|
49
49
|
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
|
50
|
-
t.to('
|
50
|
+
t.to('CPU').realize()
|
51
51
|
|
52
52
|
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
|
53
53
|
@unittest.skipIf(Device.DEFAULT != "GPU", "only test this on GPU")
|
54
54
|
def testCopyCPUto6GPUs(self):
|
55
55
|
from tinygrad.runtime.ops_gpu import CLDevice
|
56
56
|
if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
|
57
|
-
t = Tensor.
|
57
|
+
t = Tensor.ones(N, N, device="CPU").contiguous().realize()
|
58
58
|
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
|
59
59
|
for _ in range(3):
|
60
60
|
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
|
@@ -6,7 +6,7 @@ class TestDeviceSpeed(unittest.TestCase):
|
|
6
6
|
@classmethod
|
7
7
|
def setUpClass(cls):
|
8
8
|
cls.dev = Device[Device.DEFAULT]
|
9
|
-
cls.empty = Device[Device.DEFAULT].renderer.render(
|
9
|
+
cls.empty = Device[Device.DEFAULT].renderer.render([])
|
10
10
|
|
11
11
|
def test_empty_compile(self):
|
12
12
|
with Timing("compiler "):
|
@@ -1,10 +1,10 @@
|
|
1
|
-
import unittest, operator, subprocess, math
|
1
|
+
import unittest, operator, subprocess, struct, math
|
2
2
|
import numpy as np
|
3
3
|
import torch
|
4
4
|
from typing import Any, List
|
5
5
|
from tinygrad.device import is_dtype_supported
|
6
6
|
from tinygrad.helpers import getenv, DEBUG, CI
|
7
|
-
from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, to_dtype
|
7
|
+
from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, truncate_bf16, to_dtype
|
8
8
|
from tinygrad import Device, Tensor, dtypes
|
9
9
|
from tinygrad.tensor import _to_np_dtype
|
10
10
|
from hypothesis import assume, given, settings, strategies as strat
|
@@ -327,6 +327,11 @@ class TestPtrDType(unittest.TestCase):
|
|
327
327
|
dt = dtypes.float.vec(4).ptr().vec(4)
|
328
328
|
self.assertEqual(dt, eval(str(dt)))
|
329
329
|
|
330
|
+
def test_vec_ptr_sz(self):
|
331
|
+
dt = dtypes.float.ptr(1024).vec(4)
|
332
|
+
self.assertEqual(dt, eval(str(dt)))
|
333
|
+
self.assertEqual(str(dt), "dtypes.float.ptr(1024).vec(4)")
|
334
|
+
|
330
335
|
def test_vcount(self):
|
331
336
|
dt = dtypes.float.ptr().vec(4)
|
332
337
|
self.assertEqual(dt.vcount, 4)
|
@@ -434,6 +439,14 @@ class TestHelpers(unittest.TestCase):
|
|
434
439
|
self.assertEqual(truncate_fp16(65519.999), 65504)
|
435
440
|
self.assertEqual(truncate_fp16(65520), math.inf)
|
436
441
|
|
442
|
+
def test_truncate_bf16(self):
|
443
|
+
self.assertEqual(truncate_bf16(1), 1)
|
444
|
+
self.assertAlmostEqual(truncate_bf16(1.1), 1.09375, places=7)
|
445
|
+
max_bf16 = struct.unpack('f', struct.pack('I', 0x7f7f0000))[0]
|
446
|
+
self.assertEqual(truncate_bf16(max_bf16), max_bf16)
|
447
|
+
self.assertEqual(truncate_bf16(min_bf16:=-max_bf16), min_bf16)
|
448
|
+
self.assertEqual(truncate_bf16(max_bf16 * 1.001), math.inf)
|
449
|
+
|
437
450
|
class TestTypeSpec(unittest.TestCase):
|
438
451
|
def setUp(self):
|
439
452
|
self.old_default_int, self.old_default_float = dtypes.default_int, dtypes.default_float
|
@@ -796,7 +809,8 @@ class TestAutoCastType(unittest.TestCase):
|
|
796
809
|
t.reshape(2, 1).expand(2, 10001).max().backward()
|
797
810
|
np.testing.assert_allclose(t.grad.numpy(), [1, 0])
|
798
811
|
|
799
|
-
@unittest.skipIf(Device.DEFAULT=="PYTHON", "very slow")
|
812
|
+
@unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
|
813
|
+
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
|
800
814
|
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
801
815
|
def test_mean_half_precision_underflow(self):
|
802
816
|
N = 10000
|
@@ -812,6 +826,7 @@ class TestAutoCastType(unittest.TestCase):
|
|
812
826
|
t.square().mean().backward()
|
813
827
|
np.testing.assert_allclose(t.grad.numpy().flatten(), [60000 * 2 / (N*N)] * N*N)
|
814
828
|
|
829
|
+
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Precision error")
|
815
830
|
@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
|
816
831
|
def test_softmax_dtype(self):
|
817
832
|
data = [1, 2, 3]
|
@@ -88,9 +88,8 @@ def universal_test_cast(a, in_dtype, dtype):
|
|
88
88
|
numpy_value = np.array([a], dtype=_to_np_dtype(in_dtype)).astype(_to_np_dtype(dtype))
|
89
89
|
np.testing.assert_equal(tensor_value.numpy(), numpy_value)
|
90
90
|
|
91
|
+
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "Inf and nan cases are wrong on WebGPU")
|
91
92
|
def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
|
92
|
-
# the 'inf' and 'nan' cases are wrong on WEBGPU
|
93
|
-
if (any(map(math.isnan, [a, b, c])) or math.isinf(c)) and Device.DEFAULT == "WEBGPU": return
|
94
93
|
if not isinstance(op1, tuple): op1 = (op1, op1)
|
95
94
|
if not isinstance(op2, tuple): op2 = (op2, op2)
|
96
95
|
at, bt, ct = Tensor([a], dtype=d1), Tensor([b], dtype=d1), Tensor([c], dtype=d2)
|
@@ -38,7 +38,7 @@ def apply(tor, ten, tor_fn, ten_fn=None):
|
|
38
38
|
except: ten, ok = None, not ok # noqa: E722
|
39
39
|
return tor, ten, ok
|
40
40
|
|
41
|
-
@unittest.skipIf(CI and Device.DEFAULT
|
41
|
+
@unittest.skipIf(CI and Device.DEFAULT in ("CPU", "NV"), "slow")
|
42
42
|
class TestShapeOps(unittest.TestCase):
|
43
43
|
@settings.get_profile(__file__)
|
44
44
|
@given(st_shape(), st_int32, st.one_of(st_int32, st.lists(st_int32)))
|
@@ -22,7 +22,7 @@ def _simple_test(add, extract=lambda x: x, N=10):
|
|
22
22
|
class TestJit(unittest.TestCase):
|
23
23
|
|
24
24
|
@settings(deadline=2e4)
|
25
|
-
@unittest.skipUnless(Device.DEFAULT in ["LLVM", "
|
25
|
+
@unittest.skipUnless(Device.DEFAULT in ["LLVM", "CPU"], f"no support on {Device.DEFAULT}")
|
26
26
|
@given(strat.sampled_from([Tensor.exp2, Tensor.log2, Tensor.sin]))
|
27
27
|
def test_approx_jit_timeout(self, op):
|
28
28
|
with Context(TRANSCENDENTAL=2):
|
@@ -497,8 +497,8 @@ class TestCopyInsideJit(unittest.TestCase):
|
|
497
497
|
@TinyJit
|
498
498
|
def add(x,y) -> Tensor: return x.to(Device.DEFAULT)+y
|
499
499
|
for _ in range(5):
|
500
|
-
# create a Tensor
|
501
|
-
a = Tensor.rand(16,16,device="
|
500
|
+
# create a Tensor on CPU
|
501
|
+
a = Tensor.rand(16,16,device="CPU").realize()
|
502
502
|
b = Tensor.rand(16,16).realize()
|
503
503
|
out = add(a,b)
|
504
504
|
np.testing.assert_allclose(out.flatten().tolist(), [x+y for x,y in zip(a.flatten().tolist(), b.flatten().tolist())])
|
@@ -529,12 +529,12 @@ class TestJitPrune(unittest.TestCase):
|
|
529
529
|
w2_prune = TinyJit(w2, prune=True)
|
530
530
|
|
531
531
|
for _ in range(3):
|
532
|
-
a = Tensor.rand(16, device="
|
532
|
+
a = Tensor.rand(16, device="CPU").realize()
|
533
533
|
out = w2_noprune(a)
|
534
534
|
np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])
|
535
535
|
|
536
536
|
for _ in range(3):
|
537
|
-
a = Tensor.rand(16, device="
|
537
|
+
a = Tensor.rand(16, device="CPU").realize()
|
538
538
|
out = w2_prune(a)
|
539
539
|
np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])
|
540
540
|
|
@@ -5,7 +5,7 @@ from tinygrad import Device
|
|
5
5
|
|
6
6
|
class TestKernelCache(unittest.TestCase):
|
7
7
|
def test_kernel_cache_in_action(self):
|
8
|
-
if Device.DEFAULT not in ["
|
8
|
+
if Device.DEFAULT not in ["CPU"]:
|
9
9
|
self.skipTest("No custom kernel cache is implemented")
|
10
10
|
|
11
11
|
unique_const = 0.6765677269
|
@@ -16,14 +16,14 @@ class TestKernelCache(unittest.TestCase):
|
|
16
16
|
|
17
17
|
a1 = Tensor.rand(4,4).realize()
|
18
18
|
b1 = Tensor.rand(4,4).realize()
|
19
|
-
orig_compile_func = Device['
|
20
|
-
Device['
|
19
|
+
orig_compile_func = Device['CPU'].compiler
|
20
|
+
Device['CPU'].compiler = None # making it not callable
|
21
21
|
|
22
22
|
try:
|
23
23
|
x1 = a1 + b1 + unique_const
|
24
24
|
x1.realize() # Same kernel should be from cache.
|
25
25
|
finally:
|
26
|
-
Device['
|
26
|
+
Device['CPU'].compiler = orig_compile_func
|
27
27
|
|
28
28
|
if __name__ == "__main__":
|
29
29
|
unittest.main()
|