tinygrad 0.9.0__tar.gz → 0.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tinygrad-0.9.0 → tinygrad-0.9.1}/PKG-INFO +14 -6
- {tinygrad-0.9.0 → tinygrad-0.9.1}/README.md +11 -4
- {tinygrad-0.9.0 → tinygrad-0.9.1}/setup.py +4 -2
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_arange.py +3 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_const_folding.py +6 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_custom_function.py +5 -4
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_device_speed.py +1 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_dtype.py +54 -10
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_dtype_alu.py +9 -7
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_fuzz_shape_ops.py +3 -2
- tinygrad-0.9.1/test/test_graph.py +235 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_jit.py +64 -2
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer.py +533 -208
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer_failures.py +19 -12
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_linearizer_overflows.py +1 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_multitensor.py +148 -78
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_nn.py +98 -62
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_ops.py +194 -105
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_optim.py +2 -1
- tinygrad-0.9.1/test/test_pattern_matcher.py +168 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_pickle.py +11 -1
- tinygrad-0.9.1/test/test_print_tree.py +66 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_randomness.py +12 -6
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_schedule.py +339 -42
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_search.py +3 -3
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_subbuffer.py +3 -3
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_jit.py +62 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_ops.py +37 -29
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_symbolic_shapetracker.py +47 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor.py +96 -58
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor_variable.py +23 -18
- tinygrad-0.9.1/test/test_uop_graph.py +190 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_uops.py +114 -40
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_uops_stats.py +18 -20
- tinygrad-0.9.1/test/test_verify_lazyop.py +64 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_winograd.py +2 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/codegen/kernel.py +78 -90
- tinygrad-0.9.1/tinygrad/codegen/linearizer.py +528 -0
- tinygrad-0.9.1/tinygrad/codegen/uops.py +451 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/device.py +147 -10
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/dtype.py +7 -7
- tinygrad-0.9.1/tinygrad/engine/__init__.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/graph.py +16 -16
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/jit.py +39 -36
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/realize.py +6 -5
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/schedule.py +15 -7
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/engine/search.py +6 -3
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/function.py +17 -23
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/helpers.py +77 -8
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/lazy.py +26 -26
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/multi.py +13 -9
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/__init__.py +1 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/datasets.py +2 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/state.py +3 -4
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/ops.py +49 -16
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/__init__.py +8 -4
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/assembly.py +93 -100
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/cstyle.py +47 -42
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/renderer/llvmir.py +30 -30
- tinygrad-0.9.1/tinygrad/runtime/__init__.py +0 -0
- tinygrad-0.9.1/tinygrad/runtime/autogen/amd_gpu.py +13403 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/comgr.py +36 -10
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/hsa.py +146 -14
- tinygrad-0.9.1/tinygrad/runtime/autogen/io_uring.py +1486 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/nv_gpu.py +269 -0
- tinygrad-0.9.1/tinygrad/runtime/driver/__init__.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/driver/hip_comgr.py +20 -11
- tinygrad-0.9.1/tinygrad/runtime/graph/__init__.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/clang.py +3 -2
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/cuda.py +2 -2
- tinygrad-0.9.1/tinygrad/runtime/graph/hcq.py +187 -0
- tinygrad-0.9.1/tinygrad/runtime/ops_amd.py +550 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_cuda.py +3 -3
- tinygrad-0.9.1/tinygrad/runtime/ops_disk.py +125 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_gpu.py +2 -2
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_metal.py +5 -6
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_npy.py +1 -1
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_nv.py +161 -166
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_python.py +20 -16
- tinygrad-0.9.1/tinygrad/shape/__init__.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/shapetracker.py +5 -2
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/symbolic.py +1 -3
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/shape/view.py +34 -19
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/tensor.py +219 -135
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/PKG-INFO +14 -6
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/SOURCES.txt +9 -3
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/requires.txt +2 -1
- tinygrad-0.9.0/test/test_pattern_matcher.py +0 -93
- tinygrad-0.9.0/test/test_uop_graph.py +0 -82
- tinygrad-0.9.0/tinygrad/codegen/linearizer.py +0 -460
- tinygrad-0.9.0/tinygrad/codegen/uops.py +0 -415
- tinygrad-0.9.0/tinygrad/runtime/autogen/amd_gpu.py +0 -1900
- tinygrad-0.9.0/tinygrad/runtime/driver/hsa.py +0 -143
- tinygrad-0.9.0/tinygrad/runtime/graph/hcq.py +0 -143
- tinygrad-0.9.0/tinygrad/runtime/graph/hsa.py +0 -171
- tinygrad-0.9.0/tinygrad/runtime/ops_amd.py +0 -564
- tinygrad-0.9.0/tinygrad/runtime/ops_disk.py +0 -60
- tinygrad-0.9.0/tinygrad/runtime/ops_hsa.py +0 -278
- {tinygrad-0.9.0 → tinygrad-0.9.1}/LICENSE +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/setup.cfg +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_assign.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_conv.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_conv_shapetracker.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_copy_speed.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_fusion_op.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_gc.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_image_dtype.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_kernel_cache.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_lazybuffer.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_lazyop.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_masked_st.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_method_cache.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_net_speed.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_sample.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_setitem.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_specific_conv.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_speed_v_torch.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_tensor_data.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_to_numpy.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/test/test_zero_copy.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/__init__.py +0 -0
- {tinygrad-0.9.0/tinygrad/engine → tinygrad-0.9.1/tinygrad/codegen}/__init__.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/nn/optim.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/cuda.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/hip.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/kfd.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/autogen/opencl.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/graph/metal.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_clang.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad/runtime/ops_llvm.py +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/dependency_links.txt +0 -0
- {tinygrad-0.9.0 → tinygrad-0.9.1}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tinygrad
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.1
|
4
4
|
Summary: You like pytorch? You like micrograd? You love tinygrad! <3
|
5
5
|
Author: George Hotz
|
6
6
|
License: MIT
|
@@ -10,7 +10,6 @@ Requires-Python: >=3.8
|
|
10
10
|
Description-Content-Type: text/markdown
|
11
11
|
License-File: LICENSE
|
12
12
|
Requires-Dist: numpy
|
13
|
-
Requires-Dist: tqdm
|
14
13
|
Requires-Dist: pyobjc-framework-Metal; platform_system == "Darwin"
|
15
14
|
Requires-Dist: pyobjc-framework-libdispatch; platform_system == "Darwin"
|
16
15
|
Provides-Extra: llvm
|
@@ -35,6 +34,7 @@ Requires-Dist: onnx==1.16.0; extra == "testing"
|
|
35
34
|
Requires-Dist: onnx2torch; extra == "testing"
|
36
35
|
Requires-Dist: opencv-python; extra == "testing"
|
37
36
|
Requires-Dist: tabulate; extra == "testing"
|
37
|
+
Requires-Dist: tqdm; extra == "testing"
|
38
38
|
Requires-Dist: safetensors; extra == "testing"
|
39
39
|
Requires-Dist: transformers; extra == "testing"
|
40
40
|
Requires-Dist: sentencepiece; extra == "testing"
|
@@ -43,6 +43,7 @@ Requires-Dist: librosa; extra == "testing"
|
|
43
43
|
Requires-Dist: networkx; extra == "testing"
|
44
44
|
Requires-Dist: hypothesis; extra == "testing"
|
45
45
|
Requires-Dist: nibabel; extra == "testing"
|
46
|
+
Requires-Dist: bottle; extra == "testing"
|
46
47
|
Provides-Extra: docs
|
47
48
|
Requires-Dist: mkdocs-material; extra == "docs"
|
48
49
|
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
@@ -64,7 +65,7 @@ tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) an
|
|
64
65
|
|
65
66
|
<h3>
|
66
67
|
|
67
|
-
[Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](
|
68
|
+
[Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](https://docs.tinygrad.org/) | [Discord](https://discord.gg/ZjZadyC7PK)
|
68
69
|
|
69
70
|
</h3>
|
70
71
|
|
@@ -139,7 +140,8 @@ tinygrad already supports numerous accelerators, including:
|
|
139
140
|
- [x] [LLVM](tinygrad/runtime/ops_llvm.py)
|
140
141
|
- [x] [METAL](tinygrad/runtime/ops_metal.py)
|
141
142
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
142
|
-
- [x] [
|
143
|
+
- [x] [AMD](tinygrad/runtime/ops_amd.py)
|
144
|
+
- [x] [NV](tinygrad/runtime/ops_nv.py)
|
143
145
|
|
144
146
|
And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
|
145
147
|
|
@@ -163,7 +165,7 @@ python3 -m pip install git+https://github.com/tinygrad/tinygrad.git
|
|
163
165
|
|
164
166
|
## Documentation
|
165
167
|
|
166
|
-
Documentation along with a quick start guide can be found
|
168
|
+
Documentation along with a quick start guide can be found on the [docs website](https://docs.tinygrad.org/) built from the [docs/](/docs) directory.
|
167
169
|
|
168
170
|
### Quick example comparing to PyTorch
|
169
171
|
|
@@ -209,7 +211,7 @@ Now, what we want:
|
|
209
211
|
- Bug fixes (with a regression test) are great! This library isn't 1.0 yet, so if you stumble upon a bug, fix it, write a test, and submit a PR, this is valuable work.
|
210
212
|
- Solving bounties! tinygrad [offers cash bounties](https://docs.google.com/spreadsheets/d/1WKHbT-7KOgjEawq5h5Ic1qUWzpfAzuD_J06N1JwOCGs/edit?usp=sharing) for certain improvements to the library. All new code should be high quality and well tested.
|
211
213
|
- Features. However, if you are adding a feature, consider the line tradeoff. If it's 3 lines, there's less of a bar of usefulness it has to meet over something that's 30 or 300 lines. All features must have regression tests. In general with no other constraints, your feature's API should match torch or numpy.
|
212
|
-
- Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win.
|
214
|
+
- Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win. Refactors should pass [process replay](#process-replay-tests).
|
213
215
|
- Tests/fuzzers. If you can add tests that are non brittle, they are welcome. We have some fuzzers in here too, and there's a plethora of bugs that can be found with them and by improving them. Finding bugs, even writing broken tests (that should pass) with `@unittest.expectedFailure` is great. This is how we make progress.
|
214
216
|
- Dead code removal from core `tinygrad/` folder. We don't care about the code in extra, but removing dead code from the core library is great. Less for new people to read and be confused by.
|
215
217
|
|
@@ -225,3 +227,9 @@ python3 -m pip install -e '.[testing]' # install extra deps for testing
|
|
225
227
|
python3 test/test_ops.py # just the ops tests
|
226
228
|
python3 -m pytest test/ # whole test suite
|
227
229
|
```
|
230
|
+
|
231
|
+
#### Process replay tests
|
232
|
+
|
233
|
+
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) detects changes in the generated kernels of CI tests by comparing them against tinygrad master. If your PR is a refactor or speedup without any expected behavior change, it should include a green process replay pass to get merged.
|
234
|
+
|
235
|
+
You can enable process replay by adding [run_process_replay] to your PR title. [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
|
@@ -9,7 +9,7 @@ tinygrad: For something between [PyTorch](https://github.com/pytorch/pytorch) an
|
|
9
9
|
|
10
10
|
<h3>
|
11
11
|
|
12
|
-
[Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](
|
12
|
+
[Homepage](https://github.com/tinygrad/tinygrad) | [Documentation](https://docs.tinygrad.org/) | [Discord](https://discord.gg/ZjZadyC7PK)
|
13
13
|
|
14
14
|
</h3>
|
15
15
|
|
@@ -84,7 +84,8 @@ tinygrad already supports numerous accelerators, including:
|
|
84
84
|
- [x] [LLVM](tinygrad/runtime/ops_llvm.py)
|
85
85
|
- [x] [METAL](tinygrad/runtime/ops_metal.py)
|
86
86
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
87
|
-
- [x] [
|
87
|
+
- [x] [AMD](tinygrad/runtime/ops_amd.py)
|
88
|
+
- [x] [NV](tinygrad/runtime/ops_nv.py)
|
88
89
|
|
89
90
|
And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
|
90
91
|
|
@@ -108,7 +109,7 @@ python3 -m pip install git+https://github.com/tinygrad/tinygrad.git
|
|
108
109
|
|
109
110
|
## Documentation
|
110
111
|
|
111
|
-
Documentation along with a quick start guide can be found
|
112
|
+
Documentation along with a quick start guide can be found on the [docs website](https://docs.tinygrad.org/) built from the [docs/](/docs) directory.
|
112
113
|
|
113
114
|
### Quick example comparing to PyTorch
|
114
115
|
|
@@ -154,7 +155,7 @@ Now, what we want:
|
|
154
155
|
- Bug fixes (with a regression test) are great! This library isn't 1.0 yet, so if you stumble upon a bug, fix it, write a test, and submit a PR, this is valuable work.
|
155
156
|
- Solving bounties! tinygrad [offers cash bounties](https://docs.google.com/spreadsheets/d/1WKHbT-7KOgjEawq5h5Ic1qUWzpfAzuD_J06N1JwOCGs/edit?usp=sharing) for certain improvements to the library. All new code should be high quality and well tested.
|
156
157
|
- Features. However, if you are adding a feature, consider the line tradeoff. If it's 3 lines, there's less of a bar of usefulness it has to meet over something that's 30 or 300 lines. All features must have regression tests. In general with no other constraints, your feature's API should match torch or numpy.
|
157
|
-
- Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win.
|
158
|
+
- Refactors that are clear wins. In general, if your refactor isn't a clear win it will be closed. But some refactors are amazing! Think about readability in a deep core sense. A whitespace change or moving a few functions around is useless, but if you realize that two 100 line functions can actually use the same 110 line function with arguments while also improving readability, this is a big win. Refactors should pass [process replay](#process-replay-tests).
|
158
159
|
- Tests/fuzzers. If you can add tests that are non brittle, they are welcome. We have some fuzzers in here too, and there's a plethora of bugs that can be found with them and by improving them. Finding bugs, even writing broken tests (that should pass) with `@unittest.expectedFailure` is great. This is how we make progress.
|
159
160
|
- Dead code removal from core `tinygrad/` folder. We don't care about the code in extra, but removing dead code from the core library is great. Less for new people to read and be confused by.
|
160
161
|
|
@@ -170,3 +171,9 @@ python3 -m pip install -e '.[testing]' # install extra deps for testing
|
|
170
171
|
python3 test/test_ops.py # just the ops tests
|
171
172
|
python3 -m pytest test/ # whole test suite
|
172
173
|
```
|
174
|
+
|
175
|
+
#### Process replay tests
|
176
|
+
|
177
|
+
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) detects changes in the generated kernels of CI tests by comparing them against tinygrad master. If your PR is a refactor or speedup without any expected behavior change, it should include a green process replay pass to get merged.
|
178
|
+
|
179
|
+
You can enable process replay by adding [run_process_replay] to your PR title. [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
|
@@ -8,7 +8,7 @@ with open(directory / 'README.md', encoding='utf-8') as f:
|
|
8
8
|
long_description = f.read()
|
9
9
|
|
10
10
|
setup(name='tinygrad',
|
11
|
-
version='0.9.
|
11
|
+
version='0.9.1',
|
12
12
|
description='You like pytorch? You like micrograd? You love tinygrad! <3',
|
13
13
|
author='George Hotz',
|
14
14
|
license='MIT',
|
@@ -20,7 +20,7 @@ setup(name='tinygrad',
|
|
20
20
|
"Programming Language :: Python :: 3",
|
21
21
|
"License :: OSI Approved :: MIT License"
|
22
22
|
],
|
23
|
-
install_requires=["numpy",
|
23
|
+
install_requires=["numpy",
|
24
24
|
"pyobjc-framework-Metal; platform_system=='Darwin'",
|
25
25
|
"pyobjc-framework-libdispatch; platform_system=='Darwin'"],
|
26
26
|
python_requires='>=3.8',
|
@@ -46,6 +46,7 @@ setup(name='tinygrad',
|
|
46
46
|
"onnx2torch",
|
47
47
|
"opencv-python",
|
48
48
|
"tabulate",
|
49
|
+
"tqdm",
|
49
50
|
"safetensors",
|
50
51
|
"transformers",
|
51
52
|
"sentencepiece",
|
@@ -54,6 +55,7 @@ setup(name='tinygrad',
|
|
54
55
|
"networkx",
|
55
56
|
"hypothesis",
|
56
57
|
"nibabel",
|
58
|
+
"bottle",
|
57
59
|
],
|
58
60
|
'docs': [
|
59
61
|
"mkdocs-material",
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import unittest
|
2
2
|
from tinygrad import Tensor, GlobalCounters
|
3
|
+
from tinygrad.helpers import Context
|
3
4
|
|
4
5
|
class TestArange(unittest.TestCase):
|
5
6
|
def _get_flops(self, N):
|
6
7
|
GlobalCounters.reset()
|
7
|
-
|
8
|
+
with Context(NOOPT=1):
|
9
|
+
Tensor.arange(N).realize()
|
8
10
|
return GlobalCounters.global_ops
|
9
11
|
|
10
12
|
def test_complexity(self):
|
@@ -28,6 +28,11 @@ class TestUnaryOpsConstFolding(unittest.TestCase):
|
|
28
28
|
_check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
|
29
29
|
_check_ast_count(0, Tensor([1, 2, 3]).neg().neg())
|
30
30
|
|
31
|
+
def test_neg_realized_no_fold(self):
|
32
|
+
x = Tensor.randn(32, 32)
|
33
|
+
x = x.clip(0, 1).realize()
|
34
|
+
_check_ast_count(1, x.neg())
|
35
|
+
|
31
36
|
class TestBinaryOpsConstFolding(unittest.TestCase):
|
32
37
|
def test_add_literal_zero(self):
|
33
38
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) + 0)
|
@@ -250,4 +255,4 @@ class TestTautologicalCompare(unittest.TestCase):
|
|
250
255
|
np.testing.assert_equal((a != a).numpy(), [True, False, False])
|
251
256
|
|
252
257
|
if __name__ == '__main__':
|
253
|
-
unittest.main()
|
258
|
+
unittest.main()
|
@@ -31,7 +31,7 @@ def atan2_cpu(ret:Buffer, a:Buffer, b:Buffer): ret.copyin(np.require(np.arctan2(
|
|
31
31
|
# NOTE: The derivative of atan2 doesn't need a custom op! https://www.liquisearch.com/atan2/derivative
|
32
32
|
# In general, it is also optional to write a backward function, just your backward pass won't work without it
|
33
33
|
|
34
|
-
from tinygrad.ops import LoadOps, BinaryOps
|
34
|
+
from tinygrad.ops import LoadOps, BinaryOps, UnaryOps
|
35
35
|
from tinygrad.lazy import LazyBuffer
|
36
36
|
from tinygrad.tensor import Function
|
37
37
|
|
@@ -42,9 +42,10 @@ class ATan2(Function):
|
|
42
42
|
return create_lazybuffer(a.device, ShapeTracker.from_shape(a.shape), max(a.dtype, b.dtype), LoadOps.CUSTOM,
|
43
43
|
arg={"GPU": atan2_gpu, "CPU": atan2_cpu}[a.device], srcs=(a.contiguous(), b.contiguous()))
|
44
44
|
def backward(self, grad_output:LazyBuffer) -> Tuple[Optional[LazyBuffer], Optional[LazyBuffer]]:
|
45
|
-
|
46
|
-
return grad_output.e(BinaryOps.MUL, self.b.e(BinaryOps.
|
47
|
-
grad_output.e(BinaryOps.MUL, self.a.const(0).e(BinaryOps.
|
45
|
+
recip = (self.a.e(BinaryOps.MUL, self.a)).e(BinaryOps.ADD, self.b.e(BinaryOps.MUL, self.b)).e(UnaryOps.RECIP)
|
46
|
+
return grad_output.e(BinaryOps.MUL, self.b.e(BinaryOps.MUL, recip)) if self.needs_input_grad[0] else None, \
|
47
|
+
grad_output.e(BinaryOps.MUL, self.a.const(0).e(BinaryOps.ADD, self.a.e(UnaryOps.NEG)).e(BinaryOps.MUL, recip)) \
|
48
|
+
if self.needs_input_grad[1] else None
|
48
49
|
|
49
50
|
# *** third, we use our lovely new mlop in some tests ***
|
50
51
|
|
@@ -7,7 +7,7 @@ class TestDeviceSpeed(unittest.TestCase):
|
|
7
7
|
@classmethod
|
8
8
|
def setUpClass(cls):
|
9
9
|
cls.dev = Device[Device.DEFAULT]
|
10
|
-
cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph())
|
10
|
+
cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph([]))
|
11
11
|
|
12
12
|
def test_empty_compile(self):
|
13
13
|
with Timing("compiler "):
|
@@ -1,14 +1,15 @@
|
|
1
|
-
import unittest, operator, subprocess
|
1
|
+
import unittest, operator, subprocess, math
|
2
2
|
import numpy as np
|
3
3
|
import torch
|
4
4
|
from typing import Any, List
|
5
5
|
from tinygrad.helpers import getenv, DEBUG, CI
|
6
6
|
from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype
|
7
7
|
from tinygrad import Device, Tensor, dtypes
|
8
|
+
from tinygrad.tensor import _to_np_dtype
|
8
9
|
from hypothesis import given, settings, strategies as strat
|
9
10
|
from test.helpers import is_dtype_supported, rand_for_dtype
|
10
11
|
|
11
|
-
settings.register_profile("my_profile", max_examples=200, deadline=None)
|
12
|
+
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
12
13
|
settings.load_profile("my_profile")
|
13
14
|
|
14
15
|
core_dtypes = list(DTYPES_DICT.values())
|
@@ -51,10 +52,10 @@ def _test_cast(a:Tensor, target_dtype:DType):
|
|
51
52
|
# TODO: cast between double and half are broken https://github.com/tinygrad/tinygrad/issues/4084
|
52
53
|
return
|
53
54
|
|
54
|
-
_test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(target_dtype
|
55
|
+
_test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype))))
|
55
56
|
def _test_bitcast(a:Tensor, target_dtype:DType, target=None):
|
56
57
|
if target_dtype == dtypes.bfloat16: raise unittest.SkipTest("no test for bf16 bitcast yet")
|
57
|
-
_test_op(lambda: a.bitcast(target_dtype), target_dtype, target or a.numpy().view(target_dtype
|
58
|
+
_test_op(lambda: a.bitcast(target_dtype), target_dtype, target or a.numpy().view(_to_np_dtype(target_dtype)).tolist())
|
58
59
|
|
59
60
|
class TestDType(unittest.TestCase):
|
60
61
|
DTYPE: Any = None
|
@@ -66,7 +67,8 @@ class TestDType(unittest.TestCase):
|
|
66
67
|
def setUp(self):
|
67
68
|
if self.DTYPE is None: raise unittest.SkipTest("base class")
|
68
69
|
|
69
|
-
def test_to_np(self):
|
70
|
+
def test_to_np(self):
|
71
|
+
_test_to_np(Tensor(self.DATA, dtype=self.DTYPE), _to_np_dtype(self.DTYPE), np.array(self.DATA, dtype=_to_np_dtype(self.DTYPE)))
|
70
72
|
|
71
73
|
def test_casts_to(self): list(map(
|
72
74
|
lambda dtype: _test_cast(Tensor(self.DATA, dtype=dtype), self.DTYPE),
|
@@ -104,13 +106,13 @@ class TestDType(unittest.TestCase):
|
|
104
106
|
def test_dtypes_fields(self):
|
105
107
|
fields = dtypes.fields()
|
106
108
|
self.assertTrue(all(isinstance(value, DType) for value in fields.values()))
|
107
|
-
self.assertTrue(all(issubclass(value
|
109
|
+
self.assertTrue(all(issubclass(_to_np_dtype(value), np.generic) for value in fields.values() if _to_np_dtype(value) is not None))
|
108
110
|
|
109
111
|
def test_resulting_and_init_dtypes_match(self):
|
110
112
|
dtypes = list(map(np.dtype, ["bool", "uint8", "int8", "int16", "int32", "int64", "float32", "float64"]))
|
111
113
|
data = [1., 2., 0., 0.5, -1.5, 5.25]
|
112
114
|
for dt in dtypes:
|
113
|
-
arr = np.asarray(data
|
115
|
+
arr = np.asarray(data).astype(dt)
|
114
116
|
tin = Tensor(arr).numpy()
|
115
117
|
tor = torch.as_tensor(arr).detach().numpy()
|
116
118
|
assert dt == tin.dtype == tor.dtype, f"dtype mismatch: expected={dt} | tinygrad={tin.dtype} | torch={tor.dtype}"
|
@@ -329,6 +331,22 @@ class TestHelpers(unittest.TestCase):
|
|
329
331
|
def test_scalar(self, dtype, amt):
|
330
332
|
assert dtype.vec(amt).scalar() == dtype
|
331
333
|
|
334
|
+
def test_from_py(self):
|
335
|
+
assert dtypes.from_py(True) == dtypes.bool
|
336
|
+
assert dtypes.from_py(2) == dtypes.default_int
|
337
|
+
assert dtypes.from_py(3.0) == dtypes.default_float
|
338
|
+
assert dtypes.from_py([]) == dtypes.default_float
|
339
|
+
assert dtypes.from_py(()) == dtypes.default_float
|
340
|
+
assert dtypes.from_py([True]) == dtypes.bool
|
341
|
+
assert dtypes.from_py([True, 2]) == dtypes.default_int
|
342
|
+
assert dtypes.from_py([True, 3.0]) == dtypes.default_float
|
343
|
+
assert dtypes.from_py([2, 3.0]) == dtypes.default_float
|
344
|
+
assert dtypes.from_py([True, 2, 3.0]) == dtypes.default_float
|
345
|
+
with self.assertRaises(RuntimeError): dtypes.from_py(None)
|
346
|
+
with self.assertRaises(RuntimeError): dtypes.from_py([None])
|
347
|
+
with self.assertRaises(RuntimeError): dtypes.from_py({})
|
348
|
+
with self.assertRaises(RuntimeError): dtypes.from_py(set())
|
349
|
+
|
332
350
|
class TestTypeSpec(unittest.TestCase):
|
333
351
|
def setUp(self):
|
334
352
|
self.old_default_int, self.old_default_float = dtypes.default_int, dtypes.default_float
|
@@ -446,6 +464,18 @@ class TestTypeSpec(unittest.TestCase):
|
|
446
464
|
assert X_data.gather(0, indices).dtype == X_data.dtype
|
447
465
|
assert X_data.gather(1, indices).dtype == X_data.dtype
|
448
466
|
|
467
|
+
@given(strat.sampled_from(dtype_floats), strat.sampled_from(dtype_floats))
|
468
|
+
def test_attention_returns_same_dtype(self, data_dtype, default_float):
|
469
|
+
dtypes.default_float = default_float
|
470
|
+
query = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
|
471
|
+
key = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
|
472
|
+
value = Tensor.rand(32, 8, 128, 64, dtype=data_dtype)
|
473
|
+
mask = (Tensor.rand(32, 8, 128, 128) < 0.5)
|
474
|
+
assert query.scaled_dot_product_attention(key, value, is_causal=True).dtype == data_dtype
|
475
|
+
assert query.scaled_dot_product_attention(key, value, is_causal=True, dropout_p=0.3).dtype == data_dtype
|
476
|
+
assert query.scaled_dot_product_attention(key, value, is_causal=False).dtype == data_dtype
|
477
|
+
assert query.scaled_dot_product_attention(key, value, attn_mask=mask).dtype == data_dtype
|
478
|
+
|
449
479
|
class TestTypePromotion(unittest.TestCase):
|
450
480
|
@given(strat.sampled_from(core_dtypes))
|
451
481
|
def test_self_promo_to_self(self, dtype):
|
@@ -526,6 +556,16 @@ class TestAutoCastType(unittest.TestCase):
|
|
526
556
|
assert (Tensor([0, 1], dtype=dtypes.float32)).sum().dtype == dtypes.float32
|
527
557
|
assert (Tensor([0, 1], dtype=dtypes.float64)).sum().dtype == dtypes.float64
|
528
558
|
|
559
|
+
@unittest.skipUnless(is_dtype_supported(dtypes.float16), "need float16")
|
560
|
+
def test_sum_acc_dtype(self):
|
561
|
+
t = Tensor([40000, 40000], dtype=dtypes.float16)
|
562
|
+
# default float16 sum returns in float16, overflowed in this case
|
563
|
+
assert t.sum().dtype == dtypes.float16
|
564
|
+
assert math.isinf(t.sum().numpy().item())
|
565
|
+
# specifiying acc_dtype and it's not downcasted
|
566
|
+
assert t.sum(acc_dtype=dtypes.float32).dtype == dtypes.float32
|
567
|
+
np.testing.assert_allclose(t.sum(acc_dtype=dtypes.float32).numpy(), 80000)
|
568
|
+
|
529
569
|
def test_mean(self):
|
530
570
|
assert (Tensor([0, 1], dtype=dtypes.bool)).mean().dtype == dtypes.float32
|
531
571
|
assert (Tensor([0, 1], dtype=dtypes.int8)).mean().dtype == dtypes.float32
|
@@ -556,9 +596,13 @@ class TestAutoCastType(unittest.TestCase):
|
|
556
596
|
assert (Tensor([0, 1], dtype=dtypes.float32)).cumsum(0).dtype == dtypes.float32
|
557
597
|
assert (Tensor([0, 1], dtype=dtypes.float64)).cumsum(0).dtype == dtypes.float64
|
558
598
|
|
559
|
-
@given(strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes))
|
560
|
-
def test_matmul(self, dt1, dt2):
|
561
|
-
|
599
|
+
@given(strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes), strat.sampled_from(core_dtypes))
|
600
|
+
def test_matmul(self, dt1, dt2, acc_dt):
|
601
|
+
t1 = Tensor([0, 1], dtype=dt1)
|
602
|
+
t2 = Tensor([0, 1], dtype=dt2)
|
603
|
+
assert (t1 @ t2).dtype == least_upper_dtype(dt1, dt2)
|
604
|
+
# if acc_dtype is specified, return in acc_dtype
|
605
|
+
assert (t1.matmul(t2, acc_dtype=acc_dt).dtype == acc_dt)
|
562
606
|
|
563
607
|
@staticmethod
|
564
608
|
def check_where_alternate_input_other(input_, other, data_type):
|
@@ -9,9 +9,10 @@ from tinygrad.helpers import CI, getenv
|
|
9
9
|
from tinygrad.engine.schedule import create_schedule
|
10
10
|
from tinygrad.engine.realize import run_schedule
|
11
11
|
from tinygrad.ops import UnaryOps
|
12
|
+
from tinygrad.tensor import _to_np_dtype
|
12
13
|
from test.helpers import is_dtype_supported
|
13
14
|
|
14
|
-
settings.register_profile("my_profile", max_examples=200, deadline=None)
|
15
|
+
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
15
16
|
settings.load_profile("my_profile")
|
16
17
|
print(settings.default)
|
17
18
|
|
@@ -59,7 +60,7 @@ class ht:
|
|
59
60
|
def universal_test(a, b, dtype, op):
|
60
61
|
if not isinstance(op, tuple): op = (op, op)
|
61
62
|
tensor_value = (op[0](Tensor([a], dtype=dtype), Tensor([b], dtype=dtype))).numpy()
|
62
|
-
numpy_value = op[1](np.array([a]).astype(dtype
|
63
|
+
numpy_value = op[1](np.array([a]).astype(_to_np_dtype(dtype)), np.array([b]).astype(_to_np_dtype(dtype)))
|
63
64
|
if dtype in dtypes_float: np.testing.assert_allclose(tensor_value, numpy_value, atol=1e-10)
|
64
65
|
else: np.testing.assert_equal(tensor_value, numpy_value)
|
65
66
|
|
@@ -70,7 +71,7 @@ def universal_test_unary(a, dtype, op):
|
|
70
71
|
ast = sched[-1].ast[0]
|
71
72
|
run_schedule(sched)
|
72
73
|
tensor_value = out.numpy()
|
73
|
-
numpy_value = op[1](np.array([a]).astype(dtype
|
74
|
+
numpy_value = op[1](np.array([a]).astype(_to_np_dtype(dtype)))
|
74
75
|
if dtype in dtypes_float:
|
75
76
|
np.testing.assert_allclose(tensor_value, numpy_value, atol=1e-3, rtol=1e-2)
|
76
77
|
else: np.testing.assert_equal(tensor_value, numpy_value)
|
@@ -80,16 +81,16 @@ def universal_test_unary(a, dtype, op):
|
|
80
81
|
|
81
82
|
def universal_test_cast(a, in_dtype, dtype):
|
82
83
|
tensor_value = Tensor([a], dtype=in_dtype).cast(dtype)
|
83
|
-
numpy_value = np.array([a]).astype(dtype
|
84
|
+
numpy_value = np.array([a]).astype(_to_np_dtype(dtype))
|
84
85
|
np.testing.assert_equal(tensor_value.numpy(), numpy_value)
|
85
86
|
|
86
87
|
def universal_test_midcast(a, b, c, op1, op2, d1:DType, d2:DType):
|
87
88
|
if not isinstance(op1, tuple): op1 = (op1, op1)
|
88
89
|
if not isinstance(op2, tuple): op2 = (op2, op2)
|
89
90
|
at, bt, ct = Tensor([a], dtype=d1), Tensor([b], dtype=d1), Tensor([c], dtype=d2)
|
90
|
-
an, bn, cn = np.array([a]).astype(d1
|
91
|
+
an, bn, cn = np.array([a]).astype(_to_np_dtype(d1)), np.array([b]).astype(_to_np_dtype(d1)), np.array([c]).astype(_to_np_dtype(d2))
|
91
92
|
tensor_value = op2[0](op1[0](at, bt).cast(d2), ct).numpy()
|
92
|
-
numpy_value = op2[1](op1[1](an, bn).astype(d2
|
93
|
+
numpy_value = op2[1](op1[1](an, bn).astype(_to_np_dtype(d2)), cn)
|
93
94
|
np.testing.assert_allclose(tensor_value, numpy_value, rtol=1e-6 if getenv("PTX") else 1e-7)
|
94
95
|
|
95
96
|
class TestDTypeALU(unittest.TestCase):
|
@@ -145,10 +146,11 @@ class TestDTypeALU(unittest.TestCase):
|
|
145
146
|
def test_int32_midcast_float(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.int32, dtypes.float32)
|
146
147
|
|
147
148
|
# Metal and CUDACPU and HIP behave differently than numpy in CI for overflows
|
148
|
-
skip_overflow = CI and (Device.DEFAULT in {"
|
149
|
+
skip_overflow = CI and (Device.DEFAULT in {"AMD", "NV"} or getenv("CUDACPU"))
|
149
150
|
@given(strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
150
151
|
strat.floats(width=32, min_value=0, max_value=10.0) if skip_overflow else ht.float32,
|
151
152
|
ht.int32, strat.sampled_from(binary_operations), strat.sampled_from(integer_binary_operations))
|
153
|
+
@unittest.skipIf(Device.DEFAULT == "PYTHON", "TODO: fix cast inf to int32 in PYTHON")
|
152
154
|
def test_float_midcast_int32(self, a, b, c, op1, op2): universal_test_midcast(a, b, c, op1, op2, dtypes.float32, dtypes.int32)
|
153
155
|
|
154
156
|
@unittest.skip("broken. TODO: fix it")
|
@@ -1,3 +1,4 @@
|
|
1
|
+
from __future__ import annotations
|
1
2
|
import unittest
|
2
3
|
from math import prod
|
3
4
|
|
@@ -7,11 +8,11 @@ from hypothesis.extra import numpy as stn
|
|
7
8
|
import numpy as np
|
8
9
|
import torch
|
9
10
|
from tinygrad import Tensor, Device
|
10
|
-
from tinygrad.helpers import CI
|
11
|
+
from tinygrad.helpers import CI, getenv
|
11
12
|
|
12
13
|
|
13
14
|
settings.register_profile(__file__, settings.default,
|
14
|
-
max_examples=100 if CI else 250, deadline=None)
|
15
|
+
max_examples=100 if CI else 250, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
|
15
16
|
|
16
17
|
|
17
18
|
# torch wraparound for large numbers
|