tinygrad 0.10.1__tar.gz → 0.10.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. {tinygrad-0.10.1 → tinygrad-0.10.2}/PKG-INFO +20 -8
  2. {tinygrad-0.10.1 → tinygrad-0.10.2}/README.md +2 -2
  3. {tinygrad-0.10.1 → tinygrad-0.10.2}/setup.py +19 -12
  4. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_arange.py +6 -9
  5. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_assign.py +1 -0
  6. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_const_folding.py +43 -8
  7. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_conv_shapetracker.py +1 -2
  8. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_copy_speed.py +5 -5
  9. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_device_speed.py +1 -1
  10. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype.py +18 -3
  11. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_dtype_alu.py +1 -2
  12. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_fuzz_shape_ops.py +1 -1
  13. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_jit.py +5 -5
  14. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_kernel_cache.py +4 -4
  15. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer.py +77 -47
  16. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_failures.py +6 -6
  17. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_overflows.py +2 -2
  18. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_multitensor.py +5 -1
  19. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_nn.py +8 -2
  20. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_ops.py +118 -36
  21. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_pickle.py +4 -4
  22. tinygrad-0.10.2/test/test_quantize_onnx.py +212 -0
  23. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_randomness.py +2 -2
  24. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_renderer_failures.py +6 -5
  25. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_sample.py +2 -1
  26. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_schedule.py +125 -29
  27. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_search.py +32 -1
  28. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_speed_v_torch.py +10 -2
  29. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_subbuffer.py +1 -2
  30. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor.py +11 -5
  31. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_uop.py +4 -4
  32. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tiny.py +2 -2
  33. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_transcendental.py +15 -4
  34. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uop_graph.py +64 -5
  35. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uops.py +19 -33
  36. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_uops_stats.py +3 -3
  37. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_zero_copy.py +1 -1
  38. tinygrad-0.10.2/tinygrad/codegen/devectorizer.py +247 -0
  39. tinygrad-0.10.2/tinygrad/codegen/expander.py +121 -0
  40. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/kernel.py +35 -37
  41. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/linearize.py +19 -10
  42. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/lowerer.py +31 -8
  43. tinygrad-0.10.2/tinygrad/codegen/symbolic.py +476 -0
  44. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/transcendental.py +10 -0
  45. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/device.py +28 -11
  46. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/dtype.py +12 -3
  47. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/jit.py +3 -2
  48. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/multi.py +0 -1
  49. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/realize.py +7 -4
  50. tinygrad-0.10.2/tinygrad/engine/schedule.py +458 -0
  51. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/search.py +20 -27
  52. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/gradient.py +3 -0
  53. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/helpers.py +7 -4
  54. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/state.py +2 -2
  55. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/ops.py +64 -329
  56. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/__init__.py +19 -3
  57. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/cstyle.py +39 -18
  58. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/llvmir.py +55 -18
  59. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/ptx.py +6 -2
  60. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/renderer/wgsl.py +20 -12
  61. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/libc.py +404 -71
  62. tinygrad-0.10.1/tinygrad/runtime/autogen/libpciaccess.py → tinygrad-0.10.2/tinygrad/runtime/autogen/pci.py +25 -715
  63. tinygrad-0.10.2/tinygrad/runtime/autogen/webgpu.py +6985 -0
  64. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/metal.py +28 -29
  65. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_amd.py +37 -34
  66. tinygrad-0.10.1/tinygrad/runtime/ops_clang.py → tinygrad-0.10.2/tinygrad/runtime/ops_cpu.py +4 -2
  67. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_disk.py +1 -1
  68. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_dsp.py +59 -33
  69. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_llvm.py +14 -12
  70. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_metal.py +78 -62
  71. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_nv.py +9 -6
  72. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_python.py +5 -5
  73. tinygrad-0.10.2/tinygrad/runtime/ops_webgpu.py +225 -0
  74. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/amdev.py +23 -11
  75. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/ip.py +10 -10
  76. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/elf.py +2 -0
  77. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/hcq.py +7 -5
  78. tinygrad-0.10.2/tinygrad/runtime/support/llvm.py +26 -0
  79. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/shapetracker.py +3 -2
  80. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/view.py +2 -3
  81. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/spec.py +21 -20
  82. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/tensor.py +150 -90
  83. tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/highlight.min.js +1232 -0
  84. tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/languages/cpp.min.js +47 -0
  85. tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/languages/python.min.js +42 -0
  86. tinygrad-0.10.2/tinygrad/viz/assets/cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/styles/default.min.css +9 -0
  87. tinygrad-0.10.2/tinygrad/viz/assets/d3js.org/d3.v5.min.js +2 -0
  88. tinygrad-0.10.2/tinygrad/viz/assets/dagrejs.github.io/project/dagre-d3/latest/dagre-d3.min.js +4816 -0
  89. tinygrad-0.10.2/tinygrad/viz/assets/unpkg.com/@highlightjs/cdn-assets@11.10.0/styles/tokyo-night-dark.min.css +8 -0
  90. tinygrad-0.10.2/tinygrad/viz/index.html +544 -0
  91. tinygrad-0.10.2/tinygrad/viz/perfetto.html +178 -0
  92. tinygrad-0.10.2/tinygrad/viz/serve.py +205 -0
  93. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/PKG-INFO +20 -8
  94. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/SOURCES.txt +18 -4
  95. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/requires.txt +19 -6
  96. tinygrad-0.10.1/tinygrad/codegen/rewriter.py +0 -516
  97. tinygrad-0.10.1/tinygrad/engine/schedule.py +0 -486
  98. tinygrad-0.10.1/tinygrad/runtime/ops_webgpu.py +0 -63
  99. tinygrad-0.10.1/tinygrad/runtime/support/llvm.py +0 -32
  100. {tinygrad-0.10.1 → tinygrad-0.10.2}/LICENSE +0 -0
  101. {tinygrad-0.10.1 → tinygrad-0.10.2}/setup.cfg +0 -0
  102. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_compile_failures.py +0 -0
  103. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_conv.py +0 -0
  104. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_fusion_op.py +0 -0
  105. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_gc.py +0 -0
  106. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_graph.py +0 -0
  107. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_hcq.py +0 -0
  108. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_image_dtype.py +0 -0
  109. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_linearizer_dumb.py +0 -0
  110. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_masked_st.py +0 -0
  111. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_metal.py +0 -0
  112. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_method_cache.py +0 -0
  113. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_net_speed.py +0 -0
  114. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_ocl.py +0 -0
  115. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_optim.py +0 -0
  116. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_profiler.py +0 -0
  117. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_rearrange_einops.py +0 -0
  118. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_setitem.py +0 -0
  119. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_specific_conv.py +0 -0
  120. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_jit.py +0 -0
  121. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_ops.py +0 -0
  122. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_symbolic_shapetracker.py +0 -0
  123. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_data.py +0 -0
  124. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_tensor_variable.py +0 -0
  125. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_to_numpy.py +0 -0
  126. {tinygrad-0.10.1 → tinygrad-0.10.2}/test/test_winograd.py +0 -0
  127. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/__init__.py +0 -0
  128. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/codegen/__init__.py +0 -0
  129. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/__init__.py +0 -0
  130. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/engine/memory.py +0 -0
  131. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/__init__.py +0 -0
  132. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/datasets.py +0 -0
  133. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/nn/optim.py +0 -0
  134. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/py.typed +0 -0
  135. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/__init__.py +0 -0
  136. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/adreno.py +0 -0
  137. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/amd_gpu.py +0 -0
  138. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/comgr.py +0 -0
  139. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/cuda.py +0 -0
  140. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/hip.py +0 -0
  141. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/hsa.py +0 -0
  142. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/io_uring.py +0 -0
  143. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/kfd.py +0 -0
  144. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/kgsl.py +0 -0
  145. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/llvm.py +0 -0
  146. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/nv_gpu.py +0 -0
  147. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/nvrtc.py +0 -0
  148. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/opencl.py +0 -0
  149. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/qcom_dsp.py +0 -0
  150. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/autogen/vfio.py +0 -0
  151. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/__init__.py +0 -0
  152. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/cuda.py +0 -0
  153. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/graph/hcq.py +0 -0
  154. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_cloud.py +0 -0
  155. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_cuda.py +0 -0
  156. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_gpu.py +0 -0
  157. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_hip.py +0 -0
  158. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_npy.py +0 -0
  159. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/ops_qcom.py +0 -0
  160. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/__init__.py +0 -0
  161. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/allocator.py +0 -0
  162. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/am/__init__.py +0 -0
  163. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/compiler_cuda.py +0 -0
  164. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/runtime/support/compiler_hip.py +0 -0
  165. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad/shape/__init__.py +0 -0
  166. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/dependency_links.txt +0 -0
  167. {tinygrad-0.10.1 → tinygrad-0.10.2}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: tinygrad
3
- Version: 0.10.1
3
+ Version: 0.10.2
4
4
  Summary: You like pytorch? You like micrograd? You love tinygrad! <3
5
5
  Author: George Hotz
6
6
  License: MIT
@@ -20,13 +20,28 @@ Requires-Dist: typing-extensions; extra == "linting"
20
20
  Requires-Dist: pre-commit; extra == "linting"
21
21
  Requires-Dist: ruff; extra == "linting"
22
22
  Requires-Dist: types-tqdm; extra == "linting"
23
+ Provides-Extra: testing-minimal
24
+ Requires-Dist: numpy; extra == "testing-minimal"
25
+ Requires-Dist: torch; extra == "testing-minimal"
26
+ Requires-Dist: pytest; extra == "testing-minimal"
27
+ Requires-Dist: pytest-xdist; extra == "testing-minimal"
28
+ Requires-Dist: hypothesis; extra == "testing-minimal"
29
+ Provides-Extra: testing-unit
30
+ Requires-Dist: numpy; extra == "testing-unit"
31
+ Requires-Dist: torch; extra == "testing-unit"
32
+ Requires-Dist: pytest; extra == "testing-unit"
33
+ Requires-Dist: pytest-xdist; extra == "testing-unit"
34
+ Requires-Dist: hypothesis; extra == "testing-unit"
35
+ Requires-Dist: tqdm; extra == "testing-unit"
36
+ Requires-Dist: safetensors; extra == "testing-unit"
37
+ Requires-Dist: tabulate; extra == "testing-unit"
23
38
  Provides-Extra: testing
24
39
  Requires-Dist: numpy; extra == "testing"
25
40
  Requires-Dist: torch; extra == "testing"
26
- Requires-Dist: jax; extra == "testing"
27
- Requires-Dist: pillow; extra == "testing"
28
41
  Requires-Dist: pytest; extra == "testing"
29
42
  Requires-Dist: pytest-xdist; extra == "testing"
43
+ Requires-Dist: hypothesis; extra == "testing"
44
+ Requires-Dist: pillow; extra == "testing"
30
45
  Requires-Dist: onnx==1.16.0; extra == "testing"
31
46
  Requires-Dist: onnx2torch; extra == "testing"
32
47
  Requires-Dist: opencv-python; extra == "testing"
@@ -39,13 +54,10 @@ Requires-Dist: tiktoken; extra == "testing"
39
54
  Requires-Dist: blobfile; extra == "testing"
40
55
  Requires-Dist: librosa; extra == "testing"
41
56
  Requires-Dist: networkx; extra == "testing"
42
- Requires-Dist: hypothesis; extra == "testing"
43
57
  Requires-Dist: nibabel; extra == "testing"
44
58
  Requires-Dist: bottle; extra == "testing"
45
59
  Requires-Dist: ggml-python; extra == "testing"
46
60
  Requires-Dist: capstone; extra == "testing"
47
- Provides-Extra: webgpu
48
- Requires-Dist: wgpu; extra == "webgpu"
49
61
  Provides-Extra: docs
50
62
  Requires-Dist: mkdocs; extra == "docs"
51
63
  Requires-Dist: mkdocs-material; extra == "docs"
@@ -149,7 +161,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
149
161
  tinygrad already supports numerous accelerators, including:
150
162
 
151
163
  - [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
152
- - [x] [CLANG (C Code)](tinygrad/runtime/ops_clang.py)
164
+ - [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
153
165
  - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
154
166
  - [x] [METAL](tinygrad/runtime/ops_metal.py)
155
167
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
@@ -219,7 +231,7 @@ We'll start with what will get your PR closed with a pointer to this section:
219
231
 
220
232
  - No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
221
233
  - All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
222
- - Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainablity and readablity.
234
+ - Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
223
235
  - In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
224
236
  - If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.
225
237
 
@@ -81,7 +81,7 @@ See [examples/beautiful_mnist.py](examples/beautiful_mnist.py) for the full vers
81
81
  tinygrad already supports numerous accelerators, including:
82
82
 
83
83
  - [x] [GPU (OpenCL)](tinygrad/runtime/ops_gpu.py)
84
- - [x] [CLANG (C Code)](tinygrad/runtime/ops_clang.py)
84
+ - [x] [CPU (C Code)](tinygrad/runtime/ops_cpu.py)
85
85
  - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
86
86
  - [x] [METAL](tinygrad/runtime/ops_metal.py)
87
87
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
@@ -151,7 +151,7 @@ We'll start with what will get your PR closed with a pointer to this section:
151
151
 
152
152
  - No code golf! While low line count is a guiding light of this project, anything that remotely looks like code golf will be closed. The true goal is reducing complexity and increasing readability, and deleting `\n`s does nothing to help with that.
153
153
  - All docs and whitespace changes will be closed unless you are a well-known contributor. The people writing the docs should be those who know the codebase the absolute best. People who have not demonstrated that shouldn't be messing with docs. Whitespace changes are both useless *and* carry a risk of introducing bugs.
154
- - Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainablity and readablity.
154
+ - Anything you claim is a "speedup" must be benchmarked. In general, the goal is simplicity, so even if your PR makes things marginally faster, you have to consider the tradeoff with maintainability and readability.
155
155
  - In general, the code outside the core `tinygrad/` folder is not well tested, so unless the current code there is broken, you shouldn't be changing it.
156
156
  - If your PR looks "complex", is a big diff, or adds lots of lines, it won't be reviewed or merged. Consider breaking it up into smaller PRs that are individually clear wins. A common pattern I see is prerequisite refactors before adding new functionality. If you can (cleanly) refactor to the point that the feature is a 3 line change, this is great, and something easy for us to review.
157
157
 
@@ -7,16 +7,24 @@ directory = Path(__file__).resolve().parent
7
7
  with open(directory / 'README.md', encoding='utf-8') as f:
8
8
  long_description = f.read()
9
9
 
10
+ testing_minimal = [
11
+ "numpy",
12
+ "torch",
13
+ "pytest",
14
+ "pytest-xdist",
15
+ "hypothesis",
16
+ ]
17
+
10
18
  setup(name='tinygrad',
11
- version='0.10.1',
19
+ version='0.10.2',
12
20
  description='You like pytorch? You like micrograd? You love tinygrad! <3',
13
21
  author='George Hotz',
14
22
  license='MIT',
15
23
  long_description=long_description,
16
24
  long_description_content_type='text/markdown',
17
- packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
25
+ packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine', 'tinygrad.viz',
18
26
  'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.support.am', 'tinygrad.runtime.graph', 'tinygrad.shape'],
19
- package_data = {'tinygrad': ['py.typed']},
27
+ package_data = {'tinygrad': ['py.typed'], 'tinygrad.viz': ['index.html', 'perfetto.html', 'assets/**/*']},
20
28
  classifiers=[
21
29
  "Programming Language :: Python :: 3",
22
30
  "License :: OSI Approved :: MIT License"
@@ -35,13 +43,14 @@ setup(name='tinygrad',
35
43
  "types-tqdm",
36
44
  ],
37
45
  #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
38
- 'testing': [
39
- "numpy",
40
- "torch",
41
- "jax",
46
+ 'testing_minimal': testing_minimal,
47
+ 'testing_unit': testing_minimal + [
48
+ "tqdm",
49
+ "safetensors",
50
+ "tabulate" # for sz.py
51
+ ],
52
+ 'testing': testing_minimal + [
42
53
  "pillow",
43
- "pytest",
44
- "pytest-xdist",
45
54
  "onnx==1.16.0",
46
55
  "onnx2torch",
47
56
  "opencv-python",
@@ -54,13 +63,11 @@ setup(name='tinygrad',
54
63
  "blobfile",
55
64
  "librosa",
56
65
  "networkx",
57
- "hypothesis",
58
66
  "nibabel",
59
67
  "bottle",
60
68
  "ggml-python",
61
69
  "capstone"
62
70
  ],
63
- 'webgpu': ["wgpu"],
64
71
  'docs': [
65
72
  "mkdocs",
66
73
  "mkdocs-material",
@@ -73,6 +80,6 @@ setup(name='tinygrad',
73
80
  'testing_tf': [
74
81
  "tensorflow==2.15.1",
75
82
  "tensorflow_addons",
76
- ]
83
+ ],
77
84
  },
78
85
  include_package_data=True)
@@ -66,20 +66,17 @@ class TestArange(unittest.TestCase):
66
66
  return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
67
67
 
68
68
  class TestIndexing(unittest.TestCase):
69
- # update: passing after CAST_BEFORE_VIEW=1 deletion
70
- # @unittest.expectedFailure
71
69
  def test_arange_2_reduce(self):
72
70
  needle = Tensor.zeros(16384, dtype=dtypes.int).contiguous()
73
71
  needle[1337] = 1
74
72
  needle.realize()
75
73
  with Context(NOOPT=1, FUSE_ARANGE=1):
76
74
  GlobalCounters.reset()
77
- # TODO: it should work without these reshapes
78
- out = ((Tensor.arange(1,16385).reshape(16384,1)-1)*needle.reshape(16384,1)).sum()
75
+ out = ((Tensor.arange(1,16385)-1)*needle).sum()
79
76
  sched = out.schedule()
80
- assert len(sched) == 1
77
+ self.assertEqual(len(sched), 1)
81
78
  run_schedule(sched)
82
- assert out.item() == 1337, f"expected 1337, got {out.item()}"
79
+ self.assertEqual(out.item(), 1337)
83
80
 
84
81
  @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
85
82
  def test_manual_index(self):
@@ -95,7 +92,7 @@ class TestIndexing(unittest.TestCase):
95
92
  full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, 256, 16384, 1))
96
93
  X = full.sum(axis=(2,3))
97
94
  sched = X.schedule()
98
- assert len(sched) == 1
95
+ self.assertEqual(len(sched), 1)
99
96
  run_schedule(sched)
100
97
  assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
101
98
  np.testing.assert_allclose(real_index, X.numpy())
@@ -111,7 +108,7 @@ class TestIndexing(unittest.TestCase):
111
108
  assert X.shape == (4,256)
112
109
  sched = X.schedule()
113
110
  # TODO: enable these asserts when the scheduler can handle this
114
- #assert len(sched) == 1, f"{len(sched)} != 1"
111
+ #self.assertEqual(len(sched), 1)
115
112
  run_schedule(sched)
116
113
  #assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops}"
117
114
  np.testing.assert_allclose(real_index, X.numpy())
@@ -126,7 +123,7 @@ class TestIndexing(unittest.TestCase):
126
123
  X = dataset[idxs]
127
124
  assert X.shape == (4,256)
128
125
  sched = X.schedule()
129
- assert len(sched) == 2
126
+ self.assertEqual(len(sched), 2)
130
127
  run_schedule(sched)
131
128
  assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops} != {4*16384}"
132
129
  np.testing.assert_allclose(real_index, X.numpy())
@@ -203,6 +203,7 @@ class TestAssign(unittest.TestCase):
203
203
  np.testing.assert_equal(b0.numpy(), 128)
204
204
  np.testing.assert_equal(b1.numpy(), 608)
205
205
 
206
+ @unittest.skip("TODO: bring this assert back")
206
207
  def test_crossunder_assign(self):
207
208
  # NOTE: should *not* raise AssertionError from numpy
208
209
  with self.assertRaisesRegex(RuntimeError, "cycle"):
@@ -1,7 +1,10 @@
1
- import unittest, math
1
+ import unittest, itertools, math
2
+ from typing import Any
2
3
  from tinygrad import Tensor, Device, dtypes
3
- from tinygrad.ops import Ops
4
+ from tinygrad.dtype import DType
5
+ from tinygrad.ops import Ops, UOp
4
6
  from tinygrad.helpers import CI
7
+ from tinygrad.codegen.devectorizer import full_graph_rewrite
5
8
  import numpy as np
6
9
  from tinygrad.device import is_dtype_supported
7
10
 
@@ -94,18 +97,50 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
94
97
  _check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.ones(4))
95
98
  def test_literal_one_pow(self):
96
99
  _check_ast_count(0, 1 ** Tensor([1.0, 2, 3, 4]))
97
- # TODO: pow simplification
98
100
  def test_tensor_one_pow(self):
99
- _check_ast_count(1, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
101
+ _check_ast_count(0, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
102
+
103
+ class TestBitcastConstFolding(unittest.TestCase):
104
+ def test_scalar_bitcast(self):
105
+ def t(cases: dict[DType, Any]):
106
+ for (from_dt, from_v), (to_dt, to_v) in itertools.product(cases.items(), cases.items()):
107
+ if not math.isnan(from_v):
108
+ r = full_graph_rewrite(UOp.const(from_dt, from_v).bitcast(to_dt).sink()).src[0]
109
+ self.assertEqual(r.op, Ops.CONST, msg:=f"{from_dt} -> {to_dt} ({from_v} -> {to_v})")
110
+ self.assertEqual(r.dtype, to_dt, msg)
111
+ np.testing.assert_equal(r.arg, to_v, msg)
112
+
113
+ t({dtypes.int8: 0, dtypes.uint8: 0, dtypes.bool: False})
114
+ t({dtypes.int8: 1, dtypes.uint8: 1, dtypes.bool: True})
115
+
116
+ t({dtypes.int8: -1, dtypes.uint8: 2**8-1})
117
+ t({dtypes.int16: -1, dtypes.uint16: 2**16-1, dtypes.float16: float('nan')})
118
+ t({dtypes.int32: -1, dtypes.uint32: 2**32-1, dtypes.float32: float('nan')})
119
+ t({dtypes.int64: -1, dtypes.uint64: 2**64-1, dtypes.float64: float('nan')})
120
+
121
+ t({dtypes.int8: -2**7, dtypes.uint8: 2**7})
122
+ t({dtypes.int16: -2**15, dtypes.uint16: 2**15})
123
+ t({dtypes.int32: -2**31, dtypes.uint32: 2**31})
124
+ t({dtypes.int64: -2**63, dtypes.uint64: 2**63})
125
+
126
+ t({dtypes.int16: 13496, dtypes.uint16: 13496, dtypes.float16: 0.294921875})
127
+ t({dtypes.int32: 1050081145, dtypes.uint32: 1050081145, dtypes.float32: 0.29485681653022766})
128
+ t({dtypes.int64: 4598983288165178391, dtypes.uint64: 4598983288165178391, dtypes.float64: 0.29485681936461233})
129
+
130
+ def test_vec_bitcast(self):
131
+ r = full_graph_rewrite(UOp.const(dtypes.int32.vec(3), (-1, -2**31, 75)).bitcast(dtypes.uint32.vec(3)).sink()).src[0]
132
+ self.assertEqual(r.op, Ops.VECTORIZE)
133
+ self.assertEqual(r.dtype, dtypes.uint32.vec(3))
134
+ self.assertEqual(tuple(x.arg for x in r.src), (2**32-1, 2**31, 75))
100
135
 
101
136
  # folds advance indexing into basic indexing
102
137
  class TestIndexingConstFolding(unittest.TestCase):
103
138
  def test_scalar_index(self):
104
139
  t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
105
- _check_ast_count(0, t[:,:,Tensor(1),:])
106
- # NOTE: this is no longer supported because the 1+2 isn't folding early.
107
- #_check_ast_count(0, t[:,:,Tensor(1)+2,:])
108
- _check_ast_count(0, t[:,:,Tensor(1),Tensor(0)])
140
+ # TODO: fold these
141
+ _check_ast_count(2, t[:,:,Tensor(1),:])
142
+ _check_ast_count(2, t[:,:,Tensor(1)+2,:])
143
+ _check_ast_count(2, t[:,:,Tensor(1),Tensor(0)])
109
144
 
110
145
  @unittest.expectedFailure
111
146
  def test_const_tensor_index(self):
@@ -26,9 +26,8 @@ class TestConvShapetracker(unittest.TestCase):
26
26
  print(si)
27
27
  ldb = [x for x in si.ast.toposort if x.op is Ops.LOAD][0]
28
28
  st: ShapeTracker = ldb.st_arg.simplify()
29
- # NOTE: st.real_size() is broken
30
29
  print(si.inputs[0].size)
31
- #self.assertEqual(si.inputs[0].size, st.real_size())
30
+ self.assertEqual(si.inputs[0].size, st.real_size())
32
31
  for v in st.views: print(v)
33
32
 
34
33
  # same st
@@ -24,7 +24,7 @@ class TestCopySpeed(unittest.TestCase):
24
24
  s.unlink()
25
25
 
26
26
  def testCopyCPUtoDefault(self):
27
- t = Tensor.rand(N, N, device="clang").realize()
27
+ t = Tensor.ones(N, N, device="CPU").contiguous().realize()
28
28
  print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
29
29
  for _ in range(3):
30
30
  with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
@@ -35,7 +35,7 @@ class TestCopySpeed(unittest.TestCase):
35
35
  def testCopyCPUtoDefaultFresh(self):
36
36
  print("fresh copy")
37
37
  for _ in range(3):
38
- t = Tensor.rand(N, N, device="clang").realize()
38
+ t = Tensor.ones(N, N, device="CPU").contiguous().realize()
39
39
  with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"): # noqa: F821
40
40
  with Timing("queue: "):
41
41
  t.to(Device.DEFAULT).realize()
@@ -43,18 +43,18 @@ class TestCopySpeed(unittest.TestCase):
43
43
  del t
44
44
 
45
45
  def testCopyDefaulttoCPU(self):
46
- t = Tensor.rand(N, N).realize()
46
+ t = Tensor.ones(N, N).contiguous().realize()
47
47
  print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
48
48
  for _ in range(3):
49
49
  with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
50
- t.to('clang').realize()
50
+ t.to('CPU').realize()
51
51
 
52
52
  @unittest.skipIf(CI, "CI doesn't have 6 GPUs")
53
53
  @unittest.skipIf(Device.DEFAULT != "GPU", "only test this on GPU")
54
54
  def testCopyCPUto6GPUs(self):
55
55
  from tinygrad.runtime.ops_gpu import CLDevice
56
56
  if len(CLDevice.device_ids) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
57
- t = Tensor.rand(N, N, device="clang").realize()
57
+ t = Tensor.ones(N, N, device="CPU").contiguous().realize()
58
58
  print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
59
59
  for _ in range(3):
60
60
  with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
@@ -6,7 +6,7 @@ class TestDeviceSpeed(unittest.TestCase):
6
6
  @classmethod
7
7
  def setUpClass(cls):
8
8
  cls.dev = Device[Device.DEFAULT]
9
- cls.empty = Device[Device.DEFAULT].renderer.render("test", [])
9
+ cls.empty = Device[Device.DEFAULT].renderer.render([])
10
10
 
11
11
  def test_empty_compile(self):
12
12
  with Timing("compiler "):
@@ -1,10 +1,10 @@
1
- import unittest, operator, subprocess, math
1
+ import unittest, operator, subprocess, struct, math
2
2
  import numpy as np
3
3
  import torch
4
4
  from typing import Any, List
5
5
  from tinygrad.device import is_dtype_supported
6
6
  from tinygrad.helpers import getenv, DEBUG, CI
7
- from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, to_dtype
7
+ from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, truncate_bf16, to_dtype
8
8
  from tinygrad import Device, Tensor, dtypes
9
9
  from tinygrad.tensor import _to_np_dtype
10
10
  from hypothesis import assume, given, settings, strategies as strat
@@ -327,6 +327,11 @@ class TestPtrDType(unittest.TestCase):
327
327
  dt = dtypes.float.vec(4).ptr().vec(4)
328
328
  self.assertEqual(dt, eval(str(dt)))
329
329
 
330
+ def test_vec_ptr_sz(self):
331
+ dt = dtypes.float.ptr(1024).vec(4)
332
+ self.assertEqual(dt, eval(str(dt)))
333
+ self.assertEqual(str(dt), "dtypes.float.ptr(1024).vec(4)")
334
+
330
335
  def test_vcount(self):
331
336
  dt = dtypes.float.ptr().vec(4)
332
337
  self.assertEqual(dt.vcount, 4)
@@ -434,6 +439,14 @@ class TestHelpers(unittest.TestCase):
434
439
  self.assertEqual(truncate_fp16(65519.999), 65504)
435
440
  self.assertEqual(truncate_fp16(65520), math.inf)
436
441
 
442
+ def test_truncate_bf16(self):
443
+ self.assertEqual(truncate_bf16(1), 1)
444
+ self.assertAlmostEqual(truncate_bf16(1.1), 1.09375, places=7)
445
+ max_bf16 = struct.unpack('f', struct.pack('I', 0x7f7f0000))[0]
446
+ self.assertEqual(truncate_bf16(max_bf16), max_bf16)
447
+ self.assertEqual(truncate_bf16(min_bf16:=-max_bf16), min_bf16)
448
+ self.assertEqual(truncate_bf16(max_bf16 * 1.001), math.inf)
449
+
437
450
  class TestTypeSpec(unittest.TestCase):
438
451
  def setUp(self):
439
452
  self.old_default_int, self.old_default_float = dtypes.default_int, dtypes.default_float
@@ -796,7 +809,8 @@ class TestAutoCastType(unittest.TestCase):
796
809
  t.reshape(2, 1).expand(2, 10001).max().backward()
797
810
  np.testing.assert_allclose(t.grad.numpy(), [1, 0])
798
811
 
799
- @unittest.skipIf(Device.DEFAULT=="PYTHON", "very slow")
812
+ @unittest.skipIf(Device.DEFAULT == "PYTHON", "very slow")
813
+ @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Binding size is larger than the maximum storage buffer binding size")
800
814
  @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
801
815
  def test_mean_half_precision_underflow(self):
802
816
  N = 10000
@@ -812,6 +826,7 @@ class TestAutoCastType(unittest.TestCase):
812
826
  t.square().mean().backward()
813
827
  np.testing.assert_allclose(t.grad.numpy().flatten(), [60000 * 2 / (N*N)] * N*N)
814
828
 
829
+ @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Precision error")
815
830
  @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
816
831
  def test_softmax_dtype(self):
817
832
  data = [1, 2, 3]
@@ -88,9 +88,8 @@ def universal_test_cast(a, in_dtype, dtype):
88
88
  numpy_value = np.array([a], dtype=_to_np_dtype(in_dtype)).astype(_to_np_dtype(dtype))
89
89
  np.testing.assert_equal(tensor_value.numpy(), numpy_value)
90
90
 
91
+ @unittest.skipIf(Device.DEFAULT == "WEBGPU", "Inf and nan cases are wrong on WebGPU")
91
92
  def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
92
- # the 'inf' and 'nan' cases are wrong on WEBGPU
93
- if (any(map(math.isnan, [a, b, c])) or math.isinf(c)) and Device.DEFAULT == "WEBGPU": return
94
93
  if not isinstance(op1, tuple): op1 = (op1, op1)
95
94
  if not isinstance(op2, tuple): op2 = (op2, op2)
96
95
  at, bt, ct = Tensor([a], dtype=d1), Tensor([b], dtype=d1), Tensor([c], dtype=d2)
@@ -38,7 +38,7 @@ def apply(tor, ten, tor_fn, ten_fn=None):
38
38
  except: ten, ok = None, not ok # noqa: E722
39
39
  return tor, ten, ok
40
40
 
41
- @unittest.skipIf(CI and Device.DEFAULT == "CLANG", "slow")
41
+ @unittest.skipIf(CI and Device.DEFAULT in ("CPU", "NV"), "slow")
42
42
  class TestShapeOps(unittest.TestCase):
43
43
  @settings.get_profile(__file__)
44
44
  @given(st_shape(), st_int32, st.one_of(st_int32, st.lists(st_int32)))
@@ -22,7 +22,7 @@ def _simple_test(add, extract=lambda x: x, N=10):
22
22
  class TestJit(unittest.TestCase):
23
23
 
24
24
  @settings(deadline=2e4)
25
- @unittest.skipUnless(Device.DEFAULT in ["LLVM", "CLANG"], f"no support on {Device.DEFAULT}")
25
+ @unittest.skipUnless(Device.DEFAULT in ["LLVM", "CPU"], f"no support on {Device.DEFAULT}")
26
26
  @given(strat.sampled_from([Tensor.exp2, Tensor.log2, Tensor.sin]))
27
27
  def test_approx_jit_timeout(self, op):
28
28
  with Context(TRANSCENDENTAL=2):
@@ -497,8 +497,8 @@ class TestCopyInsideJit(unittest.TestCase):
497
497
  @TinyJit
498
498
  def add(x,y) -> Tensor: return x.to(Device.DEFAULT)+y
499
499
  for _ in range(5):
500
- # create a Tensor in CLANG
501
- a = Tensor.rand(16,16,device="CLANG").realize()
500
+ # create a Tensor on CPU
501
+ a = Tensor.rand(16,16,device="CPU").realize()
502
502
  b = Tensor.rand(16,16).realize()
503
503
  out = add(a,b)
504
504
  np.testing.assert_allclose(out.flatten().tolist(), [x+y for x,y in zip(a.flatten().tolist(), b.flatten().tolist())])
@@ -529,12 +529,12 @@ class TestJitPrune(unittest.TestCase):
529
529
  w2_prune = TinyJit(w2, prune=True)
530
530
 
531
531
  for _ in range(3):
532
- a = Tensor.rand(16, device="CLANG").realize()
532
+ a = Tensor.rand(16, device="CPU").realize()
533
533
  out = w2_noprune(a)
534
534
  np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])
535
535
 
536
536
  for _ in range(3):
537
- a = Tensor.rand(16, device="CLANG").realize()
537
+ a = Tensor.rand(16, device="CPU").realize()
538
538
  out = w2_prune(a)
539
539
  np.testing.assert_allclose(out.tolist(), [x*2+y for x,y in zip(weights.tolist(), a.tolist())])
540
540
 
@@ -5,7 +5,7 @@ from tinygrad import Device
5
5
 
6
6
  class TestKernelCache(unittest.TestCase):
7
7
  def test_kernel_cache_in_action(self):
8
- if Device.DEFAULT not in ["CLANG"]:
8
+ if Device.DEFAULT not in ["CPU"]:
9
9
  self.skipTest("No custom kernel cache is implemented")
10
10
 
11
11
  unique_const = 0.6765677269
@@ -16,14 +16,14 @@ class TestKernelCache(unittest.TestCase):
16
16
 
17
17
  a1 = Tensor.rand(4,4).realize()
18
18
  b1 = Tensor.rand(4,4).realize()
19
- orig_compile_func = Device['CLANG'].compiler
20
- Device['CLANG'].compiler = None # making it not callable
19
+ orig_compile_func = Device['CPU'].compiler
20
+ Device['CPU'].compiler = None # making it not callable
21
21
 
22
22
  try:
23
23
  x1 = a1 + b1 + unique_const
24
24
  x1.realize() # Same kernel should be from cache.
25
25
  finally:
26
- Device['CLANG'].compiler = orig_compile_func
26
+ Device['CPU'].compiler = orig_compile_func
27
27
 
28
28
  if __name__ == "__main__":
29
29
  unittest.main()