tinygrad 0.9.0__tar.gz → 0.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {tinygrad-0.9.0 → tinygrad-0.9.1}/PKG-INFO +14 -6
  2. {tinygrad-0.9.0 → tinygrad-0.9.1}/README.md +11 -4
  3. {tinygrad-0.9.0 → tinygrad-0.9.1}/setup.py +4 -2
  4. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_arange.py +3 -1
  5. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_const_folding.py +6 -1
  6. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_custom_function.py +5 -4
  7. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_device_speed.py +1 -1
  8. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_dtype.py +54 -10
  9. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_dtype_alu.py +9 -7
  10. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_fuzz_shape_ops.py +3 -2
  11. tinygrad-0.9.1/test/test_graph.py +235 -0
  12. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_jit.py +64 -2
  13. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer.py +533 -208
  14. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer_failures.py +19 -12
  15. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer_overflows.py +1 -1
  16. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_multitensor.py +148 -78
  17. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_nn.py +98 -62
  18. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_ops.py +194 -105
  19. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_optim.py +2 -1
  20. tinygrad-0.9.1/test/test_pattern_matcher.py +168 -0
  21. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_pickle.py +11 -1
  22. tinygrad-0.9.1/test/test_print_tree.py +66 -0
  23. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_randomness.py +12 -6
  24. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_schedule.py +339 -42
  25. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_search.py +3 -3
  26. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_subbuffer.py +3 -3
  27. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_jit.py +62 -1
  28. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_ops.py +37 -29
  29. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_shapetracker.py +47 -1
  30. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor.py +96 -58
  31. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor_variable.py +23 -18
  32. tinygrad-0.9.1/test/test_uop_graph.py +190 -0
  33. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_uops.py +114 -40
  34. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_uops_stats.py +18 -20
  35. tinygrad-0.9.1/test/test_verify_lazyop.py +64 -0
  36. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_winograd.py +2 -1
  37. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/codegen/kernel.py +78 -90
  38. tinygrad-0.9.1/tinygrad/codegen/linearizer.py +528 -0
  39. tinygrad-0.9.1/tinygrad/codegen/uops.py +451 -0
  40. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/device.py +147 -10
  41. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/dtype.py +7 -7
  42. tinygrad-0.9.1/tinygrad/engine/__init__.py +0 -0
  43. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/graph.py +16 -16
  44. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/jit.py +39 -36
  45. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/realize.py +6 -5
  46. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/schedule.py +15 -7
  47. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/search.py +6 -3
  48. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/function.py +17 -23
  49. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/helpers.py +77 -8
  50. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/lazy.py +26 -26
  51. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/multi.py +13 -9
  52. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/__init__.py +1 -1
  53. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/datasets.py +2 -1
  54. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/state.py +3 -4
  55. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/ops.py +49 -16
  56. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/__init__.py +8 -4
  57. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/assembly.py +93 -100
  58. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/cstyle.py +47 -42
  59. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/llvmir.py +30 -30
  60. tinygrad-0.9.1/tinygrad/runtime/__init__.py +0 -0
  61. tinygrad-0.9.1/tinygrad/runtime/autogen/amd_gpu.py +13403 -0
  62. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/comgr.py +36 -10
  63. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/hsa.py +146 -14
  64. tinygrad-0.9.1/tinygrad/runtime/autogen/io_uring.py +1486 -0
  65. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/nv_gpu.py +269 -0
  66. tinygrad-0.9.1/tinygrad/runtime/driver/__init__.py +0 -0
  67. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/driver/hip_comgr.py +20 -11
  68. tinygrad-0.9.1/tinygrad/runtime/graph/__init__.py +0 -0
  69. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/clang.py +3 -2
  70. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/cuda.py +2 -2
  71. tinygrad-0.9.1/tinygrad/runtime/graph/hcq.py +187 -0
  72. tinygrad-0.9.1/tinygrad/runtime/ops_amd.py +550 -0
  73. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_cuda.py +3 -3
  74. tinygrad-0.9.1/tinygrad/runtime/ops_disk.py +125 -0
  75. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_gpu.py +2 -2
  76. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_metal.py +5 -6
  77. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_npy.py +1 -1
  78. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_nv.py +161 -166
  79. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_python.py +20 -16
  80. tinygrad-0.9.1/tinygrad/shape/__init__.py +0 -0
  81. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/shapetracker.py +5 -2
  82. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/symbolic.py +1 -3
  83. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/view.py +34 -19
  84. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/tensor.py +219 -135
  85. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/PKG-INFO +14 -6
  86. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/SOURCES.txt +9 -3
  87. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/requires.txt +2 -1
  88. tinygrad-0.9.0/test/test_pattern_matcher.py +0 -93
  89. tinygrad-0.9.0/test/test_uop_graph.py +0 -82
  90. tinygrad-0.9.0/tinygrad/codegen/linearizer.py +0 -460
  91. tinygrad-0.9.0/tinygrad/codegen/uops.py +0 -415
  92. tinygrad-0.9.0/tinygrad/runtime/autogen/amd_gpu.py +0 -1900
  93. tinygrad-0.9.0/tinygrad/runtime/driver/hsa.py +0 -143
  94. tinygrad-0.9.0/tinygrad/runtime/graph/hcq.py +0 -143
  95. tinygrad-0.9.0/tinygrad/runtime/graph/hsa.py +0 -171
  96. tinygrad-0.9.0/tinygrad/runtime/ops_amd.py +0 -564
  97. tinygrad-0.9.0/tinygrad/runtime/ops_disk.py +0 -60
  98. tinygrad-0.9.0/tinygrad/runtime/ops_hsa.py +0 -278
  99. {tinygrad-0.9.0 → tinygrad-0.9.1}/LICENSE +0 -0
  100. {tinygrad-0.9.0 → tinygrad-0.9.1}/setup.cfg +0 -0
  101. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_assign.py +0 -0
  102. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_conv.py +0 -0
  103. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_conv_shapetracker.py +0 -0
  104. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_copy_speed.py +0 -0
  105. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_fusion_op.py +0 -0
  106. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_gc.py +0 -0
  107. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_image_dtype.py +0 -0
  108. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_kernel_cache.py +0 -0
  109. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_lazybuffer.py +0 -0
  110. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_lazyop.py +0 -0
  111. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_masked_st.py +0 -0
  112. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_method_cache.py +0 -0
  113. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_net_speed.py +0 -0
  114. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_sample.py +0 -0
  115. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_setitem.py +0 -0
  116. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_specific_conv.py +0 -0
  117. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_speed_v_torch.py +0 -0
  118. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor_data.py +0 -0
  119. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_to_numpy.py +0 -0
  120. {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_zero_copy.py +0 -0
  121. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/__init__.py +0 -0
  122. {tinygrad-0.9.0/tinygrad/engine → tinygrad-0.9.1/tinygrad/codegen}/__init__.py +0 -0
  123. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/optim.py +0 -0
  124. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/cuda.py +0 -0
  125. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/hip.py +0 -0
  126. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/kfd.py +0 -0
  127. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/opencl.py +0 -0
  128. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/metal.py +0 -0
  129. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_clang.py +0 -0
  130. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_llvm.py +0 -0
  131. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/dependency_links.txt +0 -0
  132. {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tinygrad
3
- Version: 0.9.0
3
+ Version: 0.9.1
4
4
  Summary: You like pytorch? You like micrograd? You love tinygrad! <3
5
5
  Author: George Hotz
6
6
  License: MIT
@@ -10,7 +10,6 @@ Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
12
  Requires-Dist: numpy
13
- Requires-Dist: tqdm
14
13
  Requires-Dist: pyobjc-framework-Metal; platform_system == "Darwin"
15
14
  Requires-Dist: pyobjc-framework-libdispatch; platform_system == "Darwin"
16
15
  Provides-Extra: llvm
@@ -35,6 +34,7 @@ Requires-Dist: onnx==1.16.0; extra == "testing"
35
34
  Requires-Dist: onnx2torch; extra == "testing"
36
35
  Requires-Dist: opencv-python; extra == "testing"
37
36
  Requires-Dist: tabulate; extra == "testing"
37
+ Requires-Dist: tqdm; extra == "testing"
38
38
  Requires-Dist: safetensors; extra == "testing"
39
39
  Requires-Dist: transformers; extra == "testing"
40
40
  Requires-Dist: sentencepiece; extra == "testing"
@@ -43,6 +43,7 @@ Requires-Dist: librosa; extra == "testing"
43
43
  Requires-Dist: networkx; extra == "testing"
44
44
  Requires-Dist: hypothesis; extra == "testing"
45
45
  Requires-Dist: nibabel; extra == "testing"
46
+ Requires-Dist: bottle; extra == "testing"
46
47
  Provides-Extra: docs
47
48
  Requires-Dist: mkdocs-material; extra == "docs"
48
49
  Requires-Dist: mkdocstrings[python]; extra == "docs"
@@ -64,7 +65,7 @@ tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) an
64
65
 
65
66
  <h3>
66
67
 
67
- [Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](/docs) | [Examples](/examples) | [Showcase](/docs/showcase.md) | [Discord](https://discord.gg/ZjZadyC7PK)
68
+ [Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](https://docs.tinygrad.org/) | [Discord](https://discord.gg/ZjZadyC7PK)
68
69
 
69
70
  </h3>
70
71
 
@@ -139,7 +140,8 @@ tinygrad already supports numerous accelerators, including:
139
140
  - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
140
141
  - [x] [METAL](tinygrad/runtime/ops_metal.py)
141
142
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
142
- - [x] [HSA](tinygrad/runtime/ops_hsa.py)
143
+ - [x] [AMD](tinygrad/runtime/ops_amd.py)
144
+ - [x] [NV](tinygrad/runtime/ops_nv.py)
143
145
 
144
146
  And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
145
147
 
@@ -163,7 +165,7 @@ python3 -m pip install git+https://github.com/tinygrad/tinygrad.git
163
165
 
164
166
  ## Documentation
165
167
 
166
- Documentation along with a quick start guide can be found in the [docs/](/docs) directory.
168
+ Documentation along with a quick start guide can be found on the [docs website](https://docs.tinygrad.org/) built from the [docs/](/docs) directory.
167
169
 
168
170
  ### Quick example comparing to PyTorch
169
171
 
@@ -209,7 +211,7 @@ Now, what we want:
209
211
  - Bug fixes (with a regression test) are great! This library isn't 1.0 yet, so if you stumble upon a bug, fix it, write a test, and submit a PR, this is valuable work.
210
212
  - Solving bounties! tinygrad [offers cash bounties](https://docs.google.com/spreadsheets/d/1WKHbT-7KOgjEawq5h5Ic1qUWzpfAzuD_J06N1JwOCGs/edit?usp=sharing) for certain improvements to the library. All new code should be high quality and well tested.
211
213
  - Features. However, if you are adding a feature, consider the line tradeoff. If it's 3 lines, there's less of a bar of usefulness it has to meet over something that's 30 or 300 lines. All features must have regression tests. In general with no other constraints, your feature's API should match torch or numpy.
212
- - Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win.
214
+ - Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win. Refactors should pass [process replay](#process-replay-tests).
213
215
  - Tests/fuzzers. If you can add tests that are non brittle, they are welcome. We have some fuzzers in here too, and there's a plethora of bugs that can be found with them and by improving them. Finding bugs, even writing broken tests (that should pass) with `@unittest.expectedFailure` is great. This is how we make progress.
214
216
  - Dead code removal from core `tinygrad/` folder. We don't care about the code in extra, but removing dead code from the core library is great. Less for new people to read and be confused by.
215
217
 
@@ -225,3 +227,9 @@ python3 -m pip install -e '.[testing]' # install extra deps for testing
225
227
  python3 test/test_ops.py # just the ops tests
226
228
  python3 -m pytest test/ # whole test suite
227
229
  ```
230
+
231
+ #### Process replay tests
232
+
233
+ [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) detects changes in the generated kernels of CI tests by comparing them against tinygrad master. If your PR is a refactor or speedup without any expected behavior change, it should include a green process replay pass to get merged.
234
+
235
+ You can enable process replay by adding [run_process_replay] to your PR title. [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
@@ -9,7 +9,7 @@ tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) an
9
9
 
10
10
  <h3>
11
11
 
12
- [Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](/docs) | [Examples](/examples) | [Showcase](/docs/showcase.md) | [Discord](https://discord.gg/ZjZadyC7PK)
12
+ [Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](https://docs.tinygrad.org/) | [Discord](https://discord.gg/ZjZadyC7PK)
13
13
 
14
14
  </h3>
15
15
 
@@ -84,7 +84,8 @@ tinygrad already supports numerous accelerators, including:
84
84
  - [x] [LLVM](tinygrad/runtime/ops_llvm.py)
85
85
  - [x] [METAL](tinygrad/runtime/ops_metal.py)
86
86
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
87
- - [x] [HSA](tinygrad/runtime/ops_hsa.py)
87
+ - [x] [AMD](tinygrad/runtime/ops_amd.py)
88
+ - [x] [NV](tinygrad/runtime/ops_nv.py)
88
89
 
89
90
  And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
90
91
 
@@ -108,7 +109,7 @@ python3 -m pip install git+https://github.com/tinygrad/tinygrad.git
108
109
 
109
110
  ## Documentation
110
111
 
111
- Documentation along with a quick start guide can be found in the [docs/](/docs) directory.
112
+ Documentation along with a quick start guide can be found on the [docs website](https://docs.tinygrad.org/) built from the [docs/](/docs) directory.
112
113
 
113
114
  ### Quick example comparing to PyTorch
114
115
 
@@ -154,7 +155,7 @@ Now, what we want:
154
155
  - Bug fixes (with a regression test) are great! This library isn't 1.0 yet, so if you stumble upon a bug, fix it, write a test, and submit a PR, this is valuable work.
155
156
  - Solving bounties! tinygrad [offers cash bounties](https://docs.google.com/spreadsheets/d/1WKHbT-7KOgjEawq5h5Ic1qUWzpfAzuD_J06N1JwOCGs/edit?usp=sharing) for certain improvements to the library. All new code should be high quality and well tested.
156
157
  - Features. However, if you are adding a feature, consider the line tradeoff. If it's 3 lines, there's less of a bar of usefulness it has to meet over something that's 30 or 300 lines. All features must have regression tests. In general with no other constraints, your feature's API should match torch or numpy.
157
- - Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win.
158
+ - Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win. Refactors should pass [process replay](#process-replay-tests).
158
159
  - Tests/fuzzers. If you can add tests that are non brittle, they are welcome. We have some fuzzers in here too, and there's a plethora of bugs that can be found with them and by improving them. Finding bugs, even writing broken tests (that should pass) with `@unittest.expectedFailure` is great. This is how we make progress.
159
160
  - Dead code removal from core `tinygrad/` folder. We don't care about the code in extra, but removing dead code from the core library is great. Less for new people to read and be confused by.
160
161
 
@@ -170,3 +171,9 @@ python3 -m pip install -e '.[testing]' # install extra deps for testing
170
171
  python3 test/test_ops.py # just the ops tests
171
172
  python3 -m pytest test/ # whole test suite
172
173
  ```
174
+
175
+ #### Process replay tests
176
+
177
+ [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) detects changes in the generated kernels of CI tests by comparing them against tinygrad master. If your PR is a refactor or speedup without any expected behavior change, it should include a green process replay pass to get merged.
178
+
179
+ You can enable process replay by adding [run_process_replay] to your PR title. [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
@@ -8,7 +8,7 @@ with open(directory / 'README.md', encoding='utf-8') as f:
8
8
  long_description = f.read()
9
9
 
10
10
  setup(name='tinygrad',
11
- version='0.9.0',
11
+ version='0.9.1',
12
12
  description='You like pytorch? You like micrograd? You love tinygrad! <3',
13
13
  author='George Hotz',
14
14
  license='MIT',
@@ -20,7 +20,7 @@ setup(name='tinygrad',
20
20
  "Programming Language :: Python :: 3",
21
21
  "License :: OSI Approved :: MIT License"
22
22
  ],
23
- install_requires=["numpy", "tqdm",
23
+ install_requires=["numpy",
24
24
  "pyobjc-framework-Metal; platform_system=='Darwin'",
25
25
  "pyobjc-framework-libdispatch; platform_system=='Darwin'"],
26
26
  python_requires='>=3.8',
@@ -46,6 +46,7 @@ setup(name='tinygrad',
46
46
  "onnx2torch",
47
47
  "opencv-python",
48
48
  "tabulate",
49
+ "tqdm",
49
50
  "safetensors",
50
51
  "transformers",
51
52
  "sentencepiece",
@@ -54,6 +55,7 @@ setup(name='tinygrad',
54
55
  "networkx",
55
56
  "hypothesis",
56
57
  "nibabel",
58
+ "bottle",
57
59
  ],
58
60
  'docs': [
59
61
  "mkdocs-material",
@@ -1,10 +1,12 @@
1
1
  import unittest
2
2
  from tinygrad import Tensor, GlobalCounters
3
+ from tinygrad.helpers import Context
3
4
 
4
5
  class TestArange(unittest.TestCase):
5
6
  def _get_flops(self, N):
6
7
  GlobalCounters.reset()
7
- Tensor.arange(N).realize()
8
+ with Context(NOOPT=1):
9
+ Tensor.arange(N).realize()
8
10
  return GlobalCounters.global_ops
9
11
 
10
12
  def test_complexity(self):
@@ -28,6 +28,11 @@ class TestUnaryOpsConstFolding(unittest.TestCase):
28
28
  _check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
29
29
  _check_ast_count(0, Tensor([1, 2, 3]).neg().neg())
30
30
 
31
+ def test_neg_realized_no_fold(self):
32
+ x = Tensor.randn(32, 32)
33
+ x = x.clip(0, 1).realize()
34
+ _check_ast_count(1, x.neg())
35
+
31
36
  class TestBinaryOpsConstFolding(unittest.TestCase):
32
37
  def test_add_literal_zero(self):
33
38
  _check_ast_count(0, Tensor([1.0, 2, 3, 4]) + 0)
@@ -250,4 +255,4 @@ class TestTautologicalCompare(unittest.TestCase):
250
255
  np.testing.assert_equal((a != a).numpy(), [True, False, False])
251
256
 
252
257
  if __name__ == '__main__':
253
- unittest.main()
258
+ unittest.main()
@@ -31,7 +31,7 @@ def atan2_cpu(ret:Buffer, a:Buffer, b:Buffer): ret.copyin(np.require(np.arctan2(
31
31
  # NOTE: The derivative of atan2 doesn't need a custom op! https://www.liquisearch.com/atan2/derivative
32
32
  # In general, it is also optional to write a backward function, just your backward pass won't work without it
33
33
 
34
- from tinygrad.ops import LoadOps, BinaryOps
34
+ from tinygrad.ops import LoadOps, BinaryOps, UnaryOps
35
35
  from tinygrad.lazy import LazyBuffer
36
36
  from tinygrad.tensor import Function
37
37
 
@@ -42,9 +42,10 @@ class ATan2(Function):
42
42
  return create_lazybuffer(a.device, ShapeTracker.from_shape(a.shape), max(a.dtype, b.dtype), LoadOps.CUSTOM,
43
43
  arg={"GPU": atan2_gpu, "CPU": atan2_cpu}[a.device], srcs=(a.contiguous(), b.contiguous()))
44
44
  def backward(self, grad_output:LazyBuffer) -> Tuple[Optional[LazyBuffer], Optional[LazyBuffer]]:
45
- denom = (self.a.e(BinaryOps.MUL, self.a)).e(BinaryOps.ADD, self.b.e(BinaryOps.MUL, self.b))
46
- return grad_output.e(BinaryOps.MUL, self.b.e(BinaryOps.DIV, denom)) if self.needs_input_grad[0] else None, \
47
- grad_output.e(BinaryOps.MUL, self.a.const(0).e(BinaryOps.SUB, self.a).e(BinaryOps.DIV, denom)) if self.needs_input_grad[1] else None
45
+ recip = (self.a.e(BinaryOps.MUL, self.a)).e(BinaryOps.ADD, self.b.e(BinaryOps.MUL, self.b)).e(UnaryOps.RECIP)
46
+ return grad_output.e(BinaryOps.MUL, self.b.e(BinaryOps.MUL, recip)) if self.needs_input_grad[0] else None, \
47
+ grad_output.e(BinaryOps.MUL, self.a.const(0).e(BinaryOps.ADD, self.a.e(UnaryOps.NEG)).e(BinaryOps.MUL, recip)) \
48
+ if self.needs_input_grad[1] else None
48
49
 
49
50
  # *** third, we use our lovely new mlop in some tests ***
50
51
 
@@ -7,7 +7,7 @@ class TestDeviceSpeed(unittest.TestCase):
7
7
  @classmethod
8
8
  def setUpClass(cls):
9
9
  cls.dev = Device[Device.DEFAULT]
10
- cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph())
10
+ cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph([]))
11
11
 
12
12
  def test_empty_compile(self):
13
13
  with Timing("compiler "):
@@ -1,14 +1,15 @@
1
- import unittest, operator, subprocess
1
+ import unittest, operator, subprocess, math
2
2
  import numpy as np
3
3
  import torch
4
4
  from typing import Any, List
5
5
  from tinygrad.helpers import getenv, DEBUG, CI
6
6
  from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype
7
7
  from tinygrad import Device, Tensor, dtypes
8
+ from tinygrad.tensor import _to_np_dtype
8
9
  from hypothesis import given, settings, strategies as strat
9
10
  from test.helpers import is_dtype_supported, rand_for_dtype
10
11
 
11
- settings.register_profile("my_profile", max_examples=200, deadline=None)
12
+ settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
12
13
  settings.load_profile("my_profile")
13
14
 
14
15
  core_dtypes = list(DTYPES_DICT.values())
@@ -51,10 +52,10 @@ def _test_cast(a:Tensor, target_dtype:DType):
51
52
  # TODO: cast between double and half are broken https://github.com/tinygrad/tinygrad/issues/4084
52
53
  return
53
54
 
54
- _test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(target_dtype.np)))
55
+ _test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype))))
55
56
  def _test_bitcast(a:Tensor, target_dtype:DType, target=None):
56
57
  if target_dtype == dtypes.bfloat16: raise unittest.SkipTest("no test for bf16 bitcast yet")
57
- _test_op(lambda: a.bitcast(target_dtype), target_dtype, target or a.numpy().view(target_dtype.np).tolist())
58
+ _test_op(lambda: a.bitcast(target_dtype), target_dtype, target or a.numpy().view(_to_np_dtype(target_dtype)).tolist())
58
59
 
59
60
  class TestDType(unittest.TestCase):
60
61
  DTYPE: Any = None
@@ -66,7 +67,8 @@ class TestDType(unittest.TestCase):
66
67
  def setUp(self):
67
68
  if self.DTYPE is None: raise unittest.SkipTest("base class")
68
69
 
69
- def test_to_np(self): _test_to_np(Tensor(self.DATA, dtype=self.DTYPE), self.DTYPE.np, np.array(self.DATA, dtype=self.DTYPE.np))
70
+ def test_to_np(self):
71
+ _test_to_np(Tensor(self.DATA, dtype=self.DTYPE), _to_np_dtype(self.DTYPE), np.array(self.DATA, dtype=_to_np_dtype(self.DTYPE)))
70
72
 
71
73
  def test_casts_to(self): list(map(
72
74
  lambda dtype: _test_cast(Tensor(self.DATA, dtype=dtype), self.DTYPE),
@@ -104,13 +106,13 @@ class TestDType(unittest.TestCase):
104
106
  def test_dtypes_fields(self):
105
107
  fields = dtypes.fields()
106
108
  self.assertTrue(all(isinstance(value, DType) for value in fields.values()))
107
- self.assertTrue(all(issubclass(value.np, np.generic) for value in fields.values() if value.np is not None))
109
+ self.assertTrue(all(issubclass(_to_np_dtype(value), np.generic) for value in fields.values() if _to_np_dtype(value) is not None))
108
110
 
109
111
  def test_resulting_and_init_dtypes_match(self):
110
112
  dtypes = list(map(np.dtype, ["bool", "uint8", "int8", "int16", "int32", "int64", "float32", "float64"]))
111
113
  data = [1., 2., 0., 0.5, -1.5, 5.25]
112
114
  for dt in dtypes:
113
- arr = np.asarray(data, dtype=dt)
115
+ arr = np.asarray(data).astype(dt)
114
116
  tin = Tensor(arr).numpy()
115
117
  tor = torch.as_tensor(arr).detach().numpy()
116
118
  assert dt == tin.dtype == tor.dtype, f"dtype mismatch: expected={dt} | tinygrad={tin.dtype} | torch={tor.dtype}"
@@ -329,6 +331,22 @@ class TestHelpers(unittest.TestCase):
329
331
  def test_scalar(self, dtype, amt):
330
332
  assert dtype.vec(amt).scalar() == dtype
331
333
 
334
+ def test_from_py(self):
335
+ assert dtypes.from_py(True) == dtypes.bool
336
+ assert dtypes.from_py(2) == dtypes.default_int
337
+ assert dtypes.from_py(3.0) == dtypes.default_float
338
+ assert dtypes.from_py([]) == dtypes.default_float
339
+ assert dtypes.from_py(()) == dtypes.default_float
340
+ assert dtypes.from_py([True]) == dtypes.bool
341
+ assert dtypes.from_py([True, 2]) == dtypes.default_int
342
+ assert dtypes.from_py([True, 3.0]) == dtypes.default_float
343
+ assert dtypes.from_py([2, 3.0]) == dtypes.default_float
344
+ assert dtypes.from_py([True, 2, 3.0]) == dtypes.default_float
345
+ with self.assertRaises(RuntimeError): dtypes.from_py(None)
346
+ with self.assertRaises(RuntimeError): dtypes.from_py([None])
347
+ with self.assertRaises(RuntimeError): dtypes.from_py({})
348
+ with self.assertRaises(RuntimeError): dtypes.from_py(set())
349
+
332
350
  class TestTypeSpec(unittest.TestCase):
333
351
  def setUp(self):
334
352
  self.old_default_int, self.old_default_float = dtypes.default_int, dtypes.default_float
@@ -446,6 +464,18 @@ class TestTypeSpec(unittest.TestCase):
446
464
  assert X_data.gather(0, indices).dtype == X_data.dtype
447
465
  assert X_data.gather(1, indices).dtype == X_data.dtype
448
466
 
467
+ @given(strat.sampled_from(dtype_floats), strat.sampled_from(dtype_floats))
468
+ def test_attention_returns_same_dtype(self, data_dtype, default_float):
469
+ dtypes.default_float = default_float
470
+ query = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
471
+ key = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
472
+ value = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
473
+ mask = (Tensor.rand(32, 8, 128, 128) < 0.5)
474
+ assert query.scaled_dot_product_attention(key, value, is_causal=True).dtype == data_dtype
475
+ assert query.scaled_dot_product_attention(key, value, is_causal=True, dropout_p=0.3).dtype == data_dtype
476
+ assert query.scaled_dot_product_attention(key, value, is_causal=False).dtype == data_dtype
477
+ assert query.scaled_dot_product_attention(key, value, attn_mask=mask).dtype == data_dtype
478
+
449
479
  class TestTypePromotion(unittest.TestCase):
450
480
  @given(strat.sampled_from(core_dtypes))
451
481
  def test_self_promo_to_self(self, dtype):
@@ -526,6 +556,16 @@ class TestAutoCastType(unittest.TestCase):
526
556
  assert (Tensor([0, 1], dtype=dtypes.float32)).sum().dtype == dtypes.float32
527
557
  assert (Tensor([0, 1], dtype=dtypes.float64)).sum().dtype == dtypes.float64
528
558
 
559
+ @unittest.skipUnless(is_dtype_supported(dtypes.float16), "need float16")
560
+ def test_sum_acc_dtype(self):
561
+ t = Tensor([40000, 40000], dtype=dtypes.float16)
562
+ # default float16 sum returns in float16, overflowed in this case
563
+ assert t.sum().dtype == dtypes.float16
564
+ assert math.isinf(t.sum().numpy().item())
565
+ # specifiying acc_dtype and it's not downcasted
566
+ assert t.sum(acc_dtype=dtypes.float32).dtype == dtypes.float32
567
+ np.testing.assert_allclose(t.sum(acc_dtype=dtypes.float32).numpy(), 80000)
568
+
529
569
  def test_mean(self):
530
570
  assert (Tensor([0, 1], dtype=dtypes.bool)).mean().dtype == dtypes.float32
531
571
  assert (Tensor([0, 1], dtype=dtypes.int8)).mean().dtype == dtypes.float32
@@ -556,9 +596,13 @@ class TestAutoCastType(unittest.TestCase):
556
596
  assert (Tensor([0, 1], dtype=dtypes.float32)).cumsum(0).dtype == dtypes.float32
557
597
  assert (Tensor([0, 1], dtype=dtypes.float64)).cumsum(0).dtype == dtypes.float64
558
598
 
559
- @given(strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes))
560
- def test_matmul(self, dt1, dt2):
561
- assert (Tensor([0, 1], dtype=dt1) @ Tensor([0, 1], dtype=dt2)).dtype == least_upper_dtype(dt1, dt2)
599
+ @given(strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes))
600
+ def test_matmul(self, dt1, dt2, acc_dt):
601
+ t1 = Tensor([0, 1], dtype=dt1)
602
+ t2 = Tensor([0, 1], dtype=dt2)
603
+ assert (t1 @ t2).dtype == least_upper_dtype(dt1, dt2)
604
+ # if acc_dtype is specified, return in acc_dtype
605
+ assert (t1.matmul(t2, acc_dtype=acc_dt).dtype == acc_dt)
562
606
 
563
607
  @staticmethod
564
608
  def check_where_alternate_input_other(input_, other, data_type):
@@ -9,9 +9,10 @@ from tinygrad.helpers import CI, getenv
9
9
  from tinygrad.engine.schedule import create_schedule
10
10
  from tinygrad.engine.realize import run_schedule
11
11
  from tinygrad.ops import UnaryOps
12
+ from tinygrad.tensor import _to_np_dtype
12
13
  from test.helpers import is_dtype_supported
13
14
 
14
- settings.register_profile("my_profile", max_examples=200, deadline=None)
15
+ settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
15
16
  settings.load_profile("my_profile")
16
17
  print(settings.default)
17
18
 
@@ -59,7 +60,7 @@ class ht:
59
60
  def universal_test(a, b, dtype, op):
60
61
  if not isinstance(op, tuple): op = (op, op)
61
62
  tensor_value = (op[0](Tensor([a], dtype=dtype), Tensor([b], dtype=dtype))).numpy()
62
- numpy_value = op[1](np.array([a]).astype(dtype.np), np.array([b]).astype(dtype.np))
63
+ numpy_value = op[1](np.array([a]).astype(_to_np_dtype(dtype)), np.array([b]).astype(_to_np_dtype(dtype)))
63
64
  if dtype in dtypes_float: np.testing.assert_allclose(tensor_value, numpy_value, atol=1e-10)
64
65
  else: np.testing.assert_equal(tensor_value, numpy_value)
65
66
 
@@ -70,7 +71,7 @@ def universal_test_unary(a, dtype, op):
70
71
  ast = sched[-1].ast[0]
71
72
  run_schedule(sched)
72
73
  tensor_value = out.numpy()
73
- numpy_value = op[1](np.array([a]).astype(dtype.np))
74
+ numpy_value = op[1](np.array([a]).astype(_to_np_dtype(dtype)))
74
75
  if dtype in dtypes_float:
75
76
  np.testing.assert_allclose(tensor_value, numpy_value, atol=1e-3, rtol=1e-2)
76
77
  else: np.testing.assert_equal(tensor_value, numpy_value)
@@ -80,16 +81,16 @@ def universal_test_unary(a, dtype, op):
80
81
 
81
82
  def universal_test_cast(a, in_dtype, dtype):
82
83
  tensor_value = Tensor([a], dtype=in_dtype).cast(dtype)
83
- numpy_value = np.array([a]).astype(dtype.np)
84
+ numpy_value = np.array([a]).astype(_to_np_dtype(dtype))
84
85
  np.testing.assert_equal(tensor_value.numpy(), numpy_value)
85
86
 
86
87
  def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
87
88
  if not isinstance(op1, tuple): op1 = (op1, op1)
88
89
  if not isinstance(op2, tuple): op2 = (op2, op2)
89
90
  at, bt, ct = Tensor([a], dtype=d1), Tensor([b], dtype=d1), Tensor([c], dtype=d2)
90
- an, bn, cn = np.array([a]).astype(d1.np), np.array([b]).astype(d1.np), np.array([c]).astype(d2.np)
91
+ an, bn, cn = np.array([a]).astype(_to_np_dtype(d1)), np.array([b]).astype(_to_np_dtype(d1)), np.array([c]).astype(_to_np_dtype(d2))
91
92
  tensor_value = op2[0](op1[0](at, bt).cast(d2), ct).numpy()
92
- numpy_value = op2[1](op1[1](an, bn).astype(d2.np), cn)
93
+ numpy_value = op2[1](op1[1](an, bn).astype(_to_np_dtype(d2)), cn)
93
94
  np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if getenv("PTX") else 1e-7)
94
95
 
95
96
  class TestDTypeALU(unittest.TestCase):
@@ -145,10 +146,11 @@ class TestDTypeALU(unittest.TestCase):
145
146
  def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
146
147
 
147
148
  # Metal and CUDACPU and HIP behave differently than numpy in CI for overflows
148
- skip_overflow = CI and (Device.DEFAULT in {"HSA", "AMD", "NV"} or getenv("CUDACPU"))
149
+ skip_overflow = CI and (Device.DEFAULT in {"AMD", "NV"} or getenv("CUDACPU"))
149
150
  @given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
150
151
  strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
151
152
  ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))
153
+ @unittest.skipIf(Device.DEFAULT == "PYTHON", "TODO: fix cast inf to int32 in PYTHON")
152
154
  def test_float_midcast_int32(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.float32, dtypes.int32)
153
155
 
154
156
  @unittest.skip("broken. TODO: fix it")
@@ -1,3 +1,4 @@
1
+ from __future__ import annotations
1
2
  import unittest
2
3
  from math import prod
3
4
 
@@ -7,11 +8,11 @@ from hypothesis.extra import numpy as stn
7
8
  import numpy as np
8
9
  import torch
9
10
  from tinygrad import Tensor, Device
10
- from tinygrad.helpers import CI
11
+ from tinygrad.helpers import CI, getenv
11
12
 
12
13
 
13
14
  settings.register_profile(__file__, settings.default,
14
- max_examples=100 if CI else 250, deadline=None)
15
+ max_examples=100 if CI else 250, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
15
16
 
16
17
 
17
18
  # torch wraparound for large numbers