tinygrad 0.9.2__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tinygrad-0.9.2/tinygrad.egg-info → tinygrad-0.10.0}/PKG-INFO +10 -7
- {tinygrad-0.9.2 → tinygrad-0.10.0}/README.md +4 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/setup.py +10 -8
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_arange.py +21 -9
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_assign.py +17 -4
- tinygrad-0.10.0/test/test_compile_failures.py +18 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_const_folding.py +27 -12
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_conv_shapetracker.py +9 -14
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_copy_speed.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_device_speed.py +1 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_dtype.py +133 -33
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_dtype_alu.py +19 -44
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_fusion_op.py +28 -9
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_fuzz_shape_ops.py +2 -2
- tinygrad-0.10.0/test/test_gc.py +67 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_graph.py +1 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_hcq.py +25 -13
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_image_dtype.py +31 -3
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_jit.py +21 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_lazybuffer.py +34 -13
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_linearizer.py +734 -637
- tinygrad-0.10.0/test/test_linearizer_dumb.py +223 -0
- tinygrad-0.10.0/test/test_linearizer_failures.py +1435 -0
- tinygrad-0.10.0/test/test_linearizer_overflows.py +196 -0
- tinygrad-0.10.0/test/test_metal.py +77 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_multitensor.py +110 -21
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_nn.py +219 -33
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_ocl.py +11 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_ops.py +433 -56
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_optim.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_pickle.py +27 -3
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_profiler.py +4 -3
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_randomness.py +146 -28
- tinygrad-0.10.0/test/test_rearrange_einops.py +321 -0
- tinygrad-0.10.0/test/test_renderer_failures.py +68 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_sample.py +1 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_schedule.py +462 -192
- tinygrad-0.10.0/test/test_search.py +158 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_setitem.py +23 -8
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_specific_conv.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_speed_v_torch.py +4 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_subbuffer.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_jit.py +1 -3
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_ops.py +2 -2
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_shapetracker.py +37 -40
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor.py +84 -38
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor_variable.py +36 -20
- tinygrad-0.10.0/test/test_tiny.py +84 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_transcendental.py +59 -9
- tinygrad-0.10.0/test/test_uop_graph.py +716 -0
- tinygrad-0.10.0/test/test_uops.py +454 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_uops_stats.py +45 -24
- tinygrad-0.10.0/test/test_viz.py +93 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_winograd.py +3 -3
- tinygrad-0.10.0/tinygrad/__init__.py +11 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/codegen/kernel.py +214 -214
- tinygrad-0.10.0/tinygrad/codegen/linearize.py +95 -0
- tinygrad-0.10.0/tinygrad/codegen/lowerer.py +143 -0
- tinygrad-0.10.0/tinygrad/codegen/transcendental.py +257 -0
- tinygrad-0.10.0/tinygrad/codegen/uopgraph.py +506 -0
- tinygrad-0.10.0/tinygrad/device.py +221 -0
- tinygrad-0.10.0/tinygrad/dtype.py +188 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/jit.py +57 -38
- {tinygrad-0.9.2/tinygrad → tinygrad-0.10.0/tinygrad/engine}/lazy.py +67 -58
- tinygrad-0.10.0/tinygrad/engine/memory.py +51 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/realize.py +23 -74
- tinygrad-0.10.0/tinygrad/engine/schedule.py +419 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/search.py +33 -29
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/function.py +53 -61
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/helpers.py +88 -108
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/multi.py +53 -54
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/__init__.py +85 -53
- tinygrad-0.10.0/tinygrad/nn/datasets.py +15 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/optim.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/state.py +89 -4
- tinygrad-0.10.0/tinygrad/ops.py +1152 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/renderer/__init__.py +21 -19
- tinygrad-0.10.0/tinygrad/renderer/cstyle.py +462 -0
- tinygrad-0.10.0/tinygrad/renderer/llvmir.py +142 -0
- tinygrad-0.10.0/tinygrad/renderer/ptx.py +225 -0
- tinygrad-0.10.0/tinygrad/runtime/autogen/adreno.py +17904 -0
- tinygrad-0.10.0/tinygrad/runtime/autogen/amd_gpu.py +48384 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/io_uring.py +97 -63
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/kfd.py +60 -79
- tinygrad-0.10.0/tinygrad/runtime/autogen/kgsl.py +1386 -0
- tinygrad-0.10.0/tinygrad/runtime/autogen/libc.py +5462 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/nv_gpu.py +1976 -1957
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/opencl.py +11 -11
- tinygrad-0.10.0/tinygrad/runtime/autogen/qcom_dsp.py +1739 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/clang.py +1 -1
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/cuda.py +3 -4
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/hcq.py +5 -5
- tinygrad-0.10.0/tinygrad/runtime/graph/metal.py +103 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_amd.py +86 -57
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_clang.py +11 -4
- tinygrad-0.10.0/tinygrad/runtime/ops_cloud.py +220 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_cuda.py +8 -7
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_disk.py +25 -22
- tinygrad-0.10.0/tinygrad/runtime/ops_dsp.py +181 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_gpu.py +26 -15
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_hip.py +3 -5
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_llvm.py +15 -10
- tinygrad-0.10.0/tinygrad/runtime/ops_metal.py +188 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_nv.py +135 -96
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_python.py +65 -62
- tinygrad-0.10.0/tinygrad/runtime/ops_qcom.py +405 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/compiler_cuda.py +6 -7
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/compiler_hip.py +3 -5
- tinygrad-0.9.2/tinygrad/device.py → tinygrad-0.10.0/tinygrad/runtime/support/hcq.py +66 -206
- tinygrad-0.10.0/tinygrad/shape/__init__.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/shape/shapetracker.py +38 -39
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/shape/view.py +79 -52
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/tensor.py +778 -336
- {tinygrad-0.9.2 → tinygrad-0.10.0/tinygrad.egg-info}/PKG-INFO +10 -7
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/SOURCES.txt +17 -9
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/requires.txt +4 -6
- tinygrad-0.9.2/test/test_custom_function.py +0 -106
- tinygrad-0.9.2/test/test_gc.py +0 -37
- tinygrad-0.9.2/test/test_lazyop.py +0 -34
- tinygrad-0.9.2/test/test_linearizer_dumb.py +0 -104
- tinygrad-0.9.2/test/test_linearizer_failures.py +0 -467
- tinygrad-0.9.2/test/test_linearizer_overflows.py +0 -89
- tinygrad-0.9.2/test/test_pattern_matcher.py +0 -186
- tinygrad-0.9.2/test/test_renderer_failures.py +0 -43
- tinygrad-0.9.2/test/test_search.py +0 -112
- tinygrad-0.9.2/test/test_uop_graph.py +0 -662
- tinygrad-0.9.2/test/test_uops.py +0 -379
- tinygrad-0.9.2/test/test_verify_lazyop.py +0 -76
- tinygrad-0.9.2/tinygrad/__init__.py +0 -6
- tinygrad-0.9.2/tinygrad/codegen/lowerer.py +0 -215
- tinygrad-0.9.2/tinygrad/codegen/transcendental.py +0 -310
- tinygrad-0.9.2/tinygrad/codegen/uopgraph.py +0 -622
- tinygrad-0.9.2/tinygrad/codegen/uops.py +0 -293
- tinygrad-0.9.2/tinygrad/dtype.py +0 -127
- tinygrad-0.9.2/tinygrad/engine/graph.py +0 -87
- tinygrad-0.9.2/tinygrad/engine/schedule.py +0 -413
- tinygrad-0.9.2/tinygrad/nn/datasets.py +0 -8
- tinygrad-0.9.2/tinygrad/ops.py +0 -170
- tinygrad-0.9.2/tinygrad/renderer/assembly.py +0 -267
- tinygrad-0.9.2/tinygrad/renderer/cstyle.py +0 -416
- tinygrad-0.9.2/tinygrad/renderer/llvmir.py +0 -151
- tinygrad-0.9.2/tinygrad/runtime/autogen/amd_gpu.py +0 -32858
- tinygrad-0.9.2/tinygrad/runtime/autogen/libc.py +0 -4260
- tinygrad-0.9.2/tinygrad/runtime/graph/metal.py +0 -78
- tinygrad-0.9.2/tinygrad/runtime/ops_metal.py +0 -116
- tinygrad-0.9.2/tinygrad/shape/symbolic.py +0 -323
- {tinygrad-0.9.2 → tinygrad-0.10.0}/LICENSE +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/setup.cfg +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_conv.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_kernel_cache.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_masked_st.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_method_cache.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_net_speed.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor_data.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_to_numpy.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_zero_copy.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/codegen/__init__.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/__init__.py +0 -0
- /tinygrad-0.9.2/tinygrad/runtime/__init__.py → /tinygrad-0.10.0/tinygrad/py.typed +0 -0
- {tinygrad-0.9.2/tinygrad/runtime/graph → tinygrad-0.10.0/tinygrad/runtime}/__init__.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/comgr.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/cuda.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/hip.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/hsa.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/nvrtc.py +0 -0
- {tinygrad-0.9.2/tinygrad/runtime/support → tinygrad-0.10.0/tinygrad/runtime/graph}/__init__.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_npy.py +0 -0
- {tinygrad-0.9.2/tinygrad/shape → tinygrad-0.10.0/tinygrad/runtime/support}/__init__.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/elf.py +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/dependency_links.txt +0 -0
- {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,17 +1,14 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tinygrad
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.0
|
4
4
|
Summary: You like pytorch? You like micrograd? You love tinygrad! <3
|
5
5
|
Author: George Hotz
|
6
6
|
License: MIT
|
7
7
|
Classifier: Programming Language :: Python :: 3
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
9
|
-
Requires-Python: >=3.
|
9
|
+
Requires-Python: >=3.10
|
10
10
|
Description-Content-Type: text/markdown
|
11
11
|
License-File: LICENSE
|
12
|
-
Requires-Dist: numpy
|
13
|
-
Requires-Dist: pyobjc-framework-Metal; platform_system == "Darwin"
|
14
|
-
Requires-Dist: pyobjc-framework-libdispatch; platform_system == "Darwin"
|
15
12
|
Provides-Extra: llvm
|
16
13
|
Requires-Dist: llvmlite; extra == "llvm"
|
17
14
|
Provides-Extra: arm
|
@@ -20,12 +17,13 @@ Provides-Extra: triton
|
|
20
17
|
Requires-Dist: triton-nightly>=2.1.0.dev20231014192330; extra == "triton"
|
21
18
|
Provides-Extra: linting
|
22
19
|
Requires-Dist: pylint; extra == "linting"
|
23
|
-
Requires-Dist: mypy; extra == "linting"
|
20
|
+
Requires-Dist: mypy==1.11.2; extra == "linting"
|
24
21
|
Requires-Dist: typing-extensions; extra == "linting"
|
25
22
|
Requires-Dist: pre-commit; extra == "linting"
|
26
23
|
Requires-Dist: ruff; extra == "linting"
|
27
24
|
Requires-Dist: types-tqdm; extra == "linting"
|
28
25
|
Provides-Extra: testing
|
26
|
+
Requires-Dist: numpy; extra == "testing"
|
29
27
|
Requires-Dist: torch; extra == "testing"
|
30
28
|
Requires-Dist: pillow; extra == "testing"
|
31
29
|
Requires-Dist: pytest; extra == "testing"
|
@@ -45,6 +43,7 @@ Requires-Dist: networkx; extra == "testing"
|
|
45
43
|
Requires-Dist: hypothesis; extra == "testing"
|
46
44
|
Requires-Dist: nibabel; extra == "testing"
|
47
45
|
Requires-Dist: bottle; extra == "testing"
|
46
|
+
Requires-Dist: ggml-python; extra == "testing"
|
48
47
|
Provides-Extra: docs
|
49
48
|
Requires-Dist: mkdocs; extra == "docs"
|
50
49
|
Requires-Dist: mkdocs-material; extra == "docs"
|
@@ -52,6 +51,7 @@ Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
52
51
|
Requires-Dist: markdown-callouts; extra == "docs"
|
53
52
|
Requires-Dist: markdown-exec[ansi]; extra == "docs"
|
54
53
|
Requires-Dist: black; extra == "docs"
|
54
|
+
Requires-Dist: numpy; extra == "docs"
|
55
55
|
Provides-Extra: testing-tf
|
56
56
|
Requires-Dist: tensorflow==2.15.1; extra == "testing-tf"
|
57
57
|
Requires-Dist: tensorflow_addons; extra == "testing-tf"
|
@@ -145,9 +145,12 @@ tinygrad already supports numerous accelerators, including:
|
|
145
145
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
146
146
|
- [x] [AMD](tinygrad/runtime/ops_amd.py)
|
147
147
|
- [x] [NV](tinygrad/runtime/ops_nv.py)
|
148
|
+
- [x] [QCOM](tinygrad/runtime/ops_qcom.py)
|
148
149
|
|
149
150
|
And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
|
150
151
|
|
152
|
+
To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
|
153
|
+
|
151
154
|
## Installation
|
152
155
|
|
153
156
|
The current recommended way to install tinygrad is from source.
|
@@ -233,4 +236,4 @@ python3 -m pytest test/ # whole test suite
|
|
233
236
|
|
234
237
|
#### Process replay tests
|
235
238
|
|
236
|
-
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/
|
239
|
+
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.
|
@@ -87,9 +87,12 @@ tinygrad already supports numerous accelerators, including:
|
|
87
87
|
- [x] [CUDA](tinygrad/runtime/ops_cuda.py)
|
88
88
|
- [x] [AMD](tinygrad/runtime/ops_amd.py)
|
89
89
|
- [x] [NV](tinygrad/runtime/ops_nv.py)
|
90
|
+
- [x] [QCOM](tinygrad/runtime/ops_qcom.py)
|
90
91
|
|
91
92
|
And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
|
92
93
|
|
94
|
+
To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
|
95
|
+
|
93
96
|
## Installation
|
94
97
|
|
95
98
|
The current recommended way to install tinygrad is from source.
|
@@ -175,4 +178,4 @@ python3 -m pytest test/ # whole test suite
|
|
175
178
|
|
176
179
|
#### Process replay tests
|
177
180
|
|
178
|
-
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/
|
181
|
+
[Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.
|
@@ -8,7 +8,7 @@ with open(directory / 'README.md', encoding='utf-8') as f:
|
|
8
8
|
long_description = f.read()
|
9
9
|
|
10
10
|
setup(name='tinygrad',
|
11
|
-
version='0.
|
11
|
+
version='0.10.0',
|
12
12
|
description='You like pytorch? You like micrograd? You love tinygrad! <3',
|
13
13
|
author='George Hotz',
|
14
14
|
license='MIT',
|
@@ -16,28 +16,28 @@ setup(name='tinygrad',
|
|
16
16
|
long_description_content_type='text/markdown',
|
17
17
|
packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
|
18
18
|
'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.graph', 'tinygrad.shape'],
|
19
|
+
package_data = {'tinygrad': ['py.typed']},
|
19
20
|
classifiers=[
|
20
21
|
"Programming Language :: Python :: 3",
|
21
22
|
"License :: OSI Approved :: MIT License"
|
22
23
|
],
|
23
|
-
install_requires=[
|
24
|
-
|
25
|
-
"pyobjc-framework-libdispatch; platform_system=='Darwin'"],
|
26
|
-
python_requires='>=3.8',
|
24
|
+
install_requires=[],
|
25
|
+
python_requires='>=3.10',
|
27
26
|
extras_require={
|
28
27
|
'llvm': ["llvmlite"],
|
29
28
|
'arm': ["unicorn"],
|
30
29
|
'triton': ["triton-nightly>=2.1.0.dev20231014192330"],
|
31
30
|
'linting': [
|
32
31
|
"pylint",
|
33
|
-
"mypy",
|
32
|
+
"mypy==1.11.2",
|
34
33
|
"typing-extensions",
|
35
34
|
"pre-commit",
|
36
35
|
"ruff",
|
37
36
|
"types-tqdm",
|
38
37
|
],
|
39
|
-
#'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.
|
38
|
+
#'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
|
40
39
|
'testing': [
|
40
|
+
"numpy",
|
41
41
|
"torch",
|
42
42
|
"pillow",
|
43
43
|
"pytest",
|
@@ -57,6 +57,7 @@ setup(name='tinygrad',
|
|
57
57
|
"hypothesis",
|
58
58
|
"nibabel",
|
59
59
|
"bottle",
|
60
|
+
"ggml-python"
|
60
61
|
],
|
61
62
|
'docs': [
|
62
63
|
"mkdocs",
|
@@ -64,7 +65,8 @@ setup(name='tinygrad',
|
|
64
65
|
"mkdocstrings[python]",
|
65
66
|
"markdown-callouts",
|
66
67
|
"markdown-exec[ansi]",
|
67
|
-
"black"
|
68
|
+
"black",
|
69
|
+
"numpy",
|
68
70
|
],
|
69
71
|
'testing_tf': [
|
70
72
|
"tensorflow==2.15.1",
|
@@ -23,16 +23,21 @@ class TestArange(unittest.TestCase):
|
|
23
23
|
np.testing.assert_equal(tt.numpy(), np.arange(N))
|
24
24
|
return p.op_estimate
|
25
25
|
|
26
|
-
def test_complexity(self, opts=None):
|
26
|
+
def test_complexity(self, opts=None, limit=None):
|
27
27
|
# add 1 to avoid divide by 0. arange is 0 flops now!
|
28
28
|
f1 = self._get_flops(256, opts) + 1
|
29
29
|
f2 = self._get_flops(2560, opts) + 1
|
30
30
|
print(f"{f1=}, {f2=}")
|
31
31
|
assert (f1 < 5000 and f2 < 5000) or (f2 / f1 < 15), f"bad complexity, flops {f2/f1:.1f}X while inputs 10X"
|
32
|
+
if limit is not None and not getenv("PTX"):
|
33
|
+
# PTX counts index ALU in flops
|
34
|
+
assert f1 <= limit, f"{f1=}, {limit=}"
|
32
35
|
|
33
|
-
def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)])
|
34
|
-
def
|
35
|
-
def
|
36
|
+
def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)], limit=1)
|
37
|
+
def test_complexity_w_unroll2(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 2)], limit=1)
|
38
|
+
def test_complexity_w_unroll4(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 4)], limit=1)
|
39
|
+
def test_complexity_w_unroll8(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 8)], limit=1)
|
40
|
+
def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], limit=1)
|
36
41
|
|
37
42
|
@unittest.skip("doesn't work yet")
|
38
43
|
def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(op=OptOps.PADTO, axis=1, amt=32)])
|
@@ -125,23 +130,30 @@ class TestIndexing(unittest.TestCase):
|
|
125
130
|
@unittest.skip("not ready")
|
126
131
|
def test_index_fused_opt(self): self.test_index_fused(0)
|
127
132
|
|
133
|
+
def test_index_fused_out_of_bounds(self):
|
134
|
+
dataset = Tensor.rand(256, 256).realize()
|
135
|
+
idxs = Tensor([-19238, -257, 256, 495, 10982377]).realize()
|
136
|
+
with Context(NOOPT=1, FUSE_ARANGE=1):
|
137
|
+
X = dataset[idxs]
|
138
|
+
np.testing.assert_equal(X.numpy(), 0)
|
139
|
+
|
128
140
|
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
129
|
-
def test_index_mnist(self, noopt=1):
|
141
|
+
def test_index_mnist(self, noopt=1, op_limit=512*784*5):
|
130
142
|
from tinygrad.nn.datasets import mnist
|
131
143
|
X_train, Y_train, _, _ = mnist()
|
132
144
|
with Context(NOOPT=noopt, FUSE_ARANGE=1, SPLIT_REDUCEOP=0):
|
145
|
+
samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0]).realize()
|
133
146
|
GlobalCounters.reset()
|
134
|
-
samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0])
|
135
147
|
x = X_train[samples].numpy()
|
136
148
|
y = Y_train[samples].numpy()
|
137
|
-
assert GlobalCounters.global_ops <
|
149
|
+
assert GlobalCounters.global_ops < op_limit, f"too many ops {GlobalCounters.global_ops} != {op_limit}"
|
138
150
|
np.testing.assert_allclose(X_train.numpy()[samples.numpy()], x)
|
139
151
|
np.testing.assert_allclose(Y_train.numpy()[samples.numpy()], y)
|
140
152
|
@unittest.skip("not ready")
|
141
153
|
def test_index_mnist_opt(self): self.test_index_mnist(0)
|
142
154
|
|
143
155
|
@unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
|
144
|
-
def test_llama_embedding(self, noopt=1, op_limit=
|
156
|
+
def test_llama_embedding(self, noopt=1, op_limit=65536):
|
145
157
|
# llama3 is 128256
|
146
158
|
vocab_size, embed_size = (10, 3) if CI else (32000, 4096)
|
147
159
|
emb = nn.Embedding(vocab_size, embed_size)
|
@@ -161,7 +173,7 @@ class TestIndexing(unittest.TestCase):
|
|
161
173
|
# TODO: reshape to match torch, should we do this in nn?
|
162
174
|
np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
|
163
175
|
# at least the arange is being fused
|
164
|
-
def test_llama_embedding_opt(self): self.test_llama_embedding(0,
|
176
|
+
def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000 if CI else 5_898_240_000)
|
165
177
|
|
166
178
|
if __name__ == "__main__":
|
167
179
|
unittest.main()
|
@@ -2,6 +2,7 @@
|
|
2
2
|
import unittest
|
3
3
|
import numpy as np
|
4
4
|
from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
|
5
|
+
from tinygrad.engine.schedule import create_schedule
|
5
6
|
|
6
7
|
N = 200 # has to be bigger than the cache to fail
|
7
8
|
|
@@ -57,10 +58,12 @@ class TestAssign(unittest.TestCase):
|
|
57
58
|
x.realize()
|
58
59
|
x = Tensor([0])
|
59
60
|
f(x)
|
60
|
-
|
61
|
+
out = x.item()
|
62
|
+
assert out == 1, f"expected 1, got {out}"
|
61
63
|
x = Tensor([0])
|
62
64
|
f(x)
|
63
|
-
|
65
|
+
out = x.item()
|
66
|
+
assert out == 1, f"expected 1, got {out}"
|
64
67
|
|
65
68
|
def test_assign_add_jit(self):
|
66
69
|
@TinyJit
|
@@ -165,6 +168,16 @@ class TestAssign(unittest.TestCase):
|
|
165
168
|
a += 1
|
166
169
|
np.testing.assert_allclose(a.numpy(), 3)
|
167
170
|
|
171
|
+
# NOTE: this is similar to the resnet failure
|
172
|
+
#@unittest.expectedFailure
|
173
|
+
def test_double_assign_alt(self):
|
174
|
+
a = Tensor.ones(4).contiguous().realize()
|
175
|
+
b = Tensor([1, 2, 3, 4]).realize().lazydata
|
176
|
+
a1 = a.lazydata.assign(b)
|
177
|
+
a2 = a.lazydata.assign(b)
|
178
|
+
sched = create_schedule([a1, a2])
|
179
|
+
self.assertEqual(len(sched), 1)
|
180
|
+
|
168
181
|
def test_crossover_assign(self):
|
169
182
|
a = Tensor.full((4,), 2).contiguous().realize()
|
170
183
|
b = Tensor.full((4,), 3).contiguous().realize()
|
@@ -347,7 +360,7 @@ class TestAssign(unittest.TestCase):
|
|
347
360
|
|
348
361
|
def test_permuted_assignment_masked_view_possible(self):
|
349
362
|
a = Tensor.ones(4, 4).contiguous().realize()
|
350
|
-
b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2)
|
363
|
+
b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2)
|
351
364
|
a.assign(a + b)
|
352
365
|
kc = GlobalCounters.kernel_count
|
353
366
|
a.realize()
|
@@ -357,7 +370,7 @@ class TestAssign(unittest.TestCase):
|
|
357
370
|
def test_permuted_assignment_masked_view_not_contiguous(self):
|
358
371
|
a = Tensor.ones(4, 4).contiguous().realize()
|
359
372
|
with self.assertRaisesRegex(RuntimeError, "contiguous"):
|
360
|
-
b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2).permute(1, 0)
|
373
|
+
b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2).permute(1, 0)
|
361
374
|
a.assign(a + b)
|
362
375
|
a.realize()
|
363
376
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import unittest
|
2
|
+
from tinygrad import Tensor, dtypes, Device
|
3
|
+
from tinygrad.engine.realize import lower_schedule
|
4
|
+
from tinygrad.device import is_dtype_supported
|
5
|
+
|
6
|
+
class TestCompileFailures(unittest.TestCase):
|
7
|
+
def compile(self, out:Tensor):
|
8
|
+
for _ in lower_schedule(out.schedule()): pass
|
9
|
+
|
10
|
+
@unittest.skipUnless(is_dtype_supported(dtypes.uchar, Device.DEFAULT), f"no uint8 on {Device.DEFAULT}")
|
11
|
+
def test_interpolate_atari(self):
|
12
|
+
self.compile(Tensor.empty(210, 160, dtype='uint8').interpolate((64, 64)))
|
13
|
+
|
14
|
+
def test_add_max_uchar(self):
|
15
|
+
self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
|
16
|
+
|
17
|
+
if __name__ == '__main__':
|
18
|
+
unittest.main()
|
@@ -1,15 +1,15 @@
|
|
1
1
|
import unittest, math
|
2
2
|
from tinygrad import Tensor, Device, dtypes
|
3
|
+
from tinygrad.ops import Ops
|
3
4
|
from tinygrad.engine.schedule import create_schedule
|
4
5
|
from tinygrad.helpers import CI
|
5
|
-
from tinygrad.ops import MetaOps
|
6
6
|
import numpy as np
|
7
|
-
from
|
7
|
+
from tinygrad.device import is_dtype_supported
|
8
8
|
|
9
9
|
def _check_ast_count(desired_count:int, t:Tensor):
|
10
10
|
# NOTE: this has side effect because everything can be scheduled only once
|
11
11
|
schedule = create_schedule(t.lazydata.lbs)
|
12
|
-
asts = [s for s in schedule if s.ast.op is
|
12
|
+
asts = [s for s in schedule if s.ast.op is Ops.SINK]
|
13
13
|
assert len(asts) == desired_count
|
14
14
|
|
15
15
|
class TestUnaryOpsConstFolding(unittest.TestCase):
|
@@ -23,6 +23,7 @@ class TestUnaryOpsConstFolding(unittest.TestCase):
|
|
23
23
|
_check_ast_count(0, Tensor.ones(4).cast(dtypes.int16))
|
24
24
|
_check_ast_count(0, Tensor.full(4, fill_value=-1).cast(dtypes.uint16))
|
25
25
|
|
26
|
+
@unittest.expectedFailure # no two level fold at lazybuffer
|
26
27
|
def test_neg_folding(self):
|
27
28
|
_check_ast_count(0, Tensor([1, 2, 3]).mul(-1).neg())
|
28
29
|
_check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
|
@@ -78,6 +79,11 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
|
|
78
79
|
def test_div_tensor_one(self):
|
79
80
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) / Tensor.ones(4))
|
80
81
|
|
82
|
+
def test_idiv_literal_one(self):
|
83
|
+
_check_ast_count(0, Tensor([1, 2, 3, 4]) // 1)
|
84
|
+
def test_idiv_tensor_one(self):
|
85
|
+
_check_ast_count(0, Tensor([1, 2, 3, 4]) // Tensor.ones(4, dtype=dtypes.int32))
|
86
|
+
|
81
87
|
def test_pow_literal_zero(self):
|
82
88
|
_check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** 0)
|
83
89
|
def test_pow_tensor_zero(self):
|
@@ -124,13 +130,16 @@ class TestMovedConstFolding(unittest.TestCase):
|
|
124
130
|
|
125
131
|
def test_cast_padded(self):
|
126
132
|
# NOTE: this is folded due to CAST_BEFORE_VIEW
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
133
|
+
if is_dtype_supported(dtypes.int16):
|
134
|
+
_check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
|
135
|
+
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
|
136
|
+
if is_dtype_supported(dtypes.uint16):
|
137
|
+
_check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
|
138
|
+
np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
|
131
139
|
# not folded
|
132
|
-
|
133
|
-
|
140
|
+
if is_dtype_supported(dtypes.int64):
|
141
|
+
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
|
142
|
+
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
|
134
143
|
|
135
144
|
class TestReduceOpsConstFolding(unittest.TestCase):
|
136
145
|
def test_const_sum(self):
|
@@ -145,10 +154,18 @@ class TestReduceOpsConstFolding(unittest.TestCase):
|
|
145
154
|
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).sum())
|
146
155
|
np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).sum().numpy(), 4)
|
147
156
|
|
148
|
-
# NOTE: cannot just count the non-padded area because some
|
157
|
+
# NOTE: cannot just count the non-padded area because some Ops f do not have f(0) = 0.
|
149
158
|
_check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).exp().sum())
|
150
159
|
np.testing.assert_allclose(Tensor.ones(4).pad(((1, 1),)).exp().sum().numpy(), 4 * math.e + 2)
|
151
160
|
|
161
|
+
def test_const_prod(self):
|
162
|
+
_check_ast_count(0, Tensor.full((2, 3), fill_value=2).prod())
|
163
|
+
np.testing.assert_equal(Tensor.full((2, 3), fill_value=2).prod().numpy(), 2**(2*3))
|
164
|
+
_check_ast_count(0, Tensor.full((4, 5, 6), fill_value=2).prod(axis=0))
|
165
|
+
np.testing.assert_equal(Tensor.full((4, 5, 6), fill_value=2).prod(axis=0).numpy(), np.full((5, 6), 2**4))
|
166
|
+
_check_ast_count(0, Tensor(4).prod())
|
167
|
+
np.testing.assert_equal(Tensor(4).prod().numpy(), 4)
|
168
|
+
|
152
169
|
def test_const_max(self):
|
153
170
|
_check_ast_count(0, Tensor.ones(4, 5, 6).max())
|
154
171
|
np.testing.assert_equal(Tensor.ones(4, 5, 6).max().numpy(), 1)
|
@@ -234,7 +251,6 @@ class TestTautologicalCompare(unittest.TestCase):
|
|
234
251
|
np.testing.assert_equal((Tensor(True) < Tensor(False)).numpy(), False)
|
235
252
|
np.testing.assert_equal((Tensor(True) < Tensor(True)).numpy(), False)
|
236
253
|
|
237
|
-
@unittest.skip("not implemented yet")
|
238
254
|
def test_a_eq_a(self):
|
239
255
|
# self eq is always true for int or bool
|
240
256
|
a = Tensor([1, 2, 3])
|
@@ -244,7 +260,6 @@ class TestTautologicalCompare(unittest.TestCase):
|
|
244
260
|
a = Tensor([math.nan, 1.0, 2.0])
|
245
261
|
np.testing.assert_equal((a == a).numpy(), [False, True, True])
|
246
262
|
|
247
|
-
@unittest.skip("not implemented yet")
|
248
263
|
def test_a_ne_a(self):
|
249
264
|
# self not eq is always false for int or bool
|
250
265
|
a = Tensor([1, 2, 3])
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
import unittest
|
3
|
+
from tinygrad.ops import Ops
|
3
4
|
from tinygrad.tensor import Tensor
|
4
|
-
from tinygrad.ops import MetaOps, BufferOps
|
5
5
|
from tinygrad.nn import Conv2d
|
6
6
|
from tinygrad.engine.schedule import create_schedule
|
7
7
|
from tinygrad.shape.shapetracker import ShapeTracker, View
|
@@ -11,25 +11,23 @@ from test.unit.test_shapetracker import shapetracker_getitem
|
|
11
11
|
class TestConvShapetracker(unittest.TestCase):
|
12
12
|
def test_conv_3x3_one_view(self):
|
13
13
|
conv = Conv2d(16, 32, (3, 3))
|
14
|
-
seen = set()
|
15
14
|
|
16
|
-
# first run to init the weights, they are
|
17
|
-
create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]
|
15
|
+
# first run to init the weights, they are scheduled.
|
16
|
+
create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata])
|
18
17
|
# run it again to get the kernels
|
19
|
-
sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]
|
18
|
+
sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]) if si.ast.op is Ops.SINK]
|
20
19
|
assert len(sched) == 1, f"conv should only have one kernel, getting {len(sched)}"
|
21
|
-
for st in [x.
|
20
|
+
for st in [x.st_arg for x in sched[0].ast.parents if x.op is Ops.LOAD]:
|
22
21
|
assert len(st.views) == 1
|
23
22
|
|
24
|
-
@unittest.expectedFailure
|
25
23
|
def test_conv_2x2_backward_one_view(self):
|
26
24
|
X = Tensor.rand(1, 1, 3, 3, requires_grad=True)
|
27
25
|
conv = Conv2d(1, 1, (2, 2), bias=False)
|
28
26
|
conv(X).mean().backward()
|
29
27
|
si = X.grad.schedule()[-1]
|
30
28
|
print(si)
|
31
|
-
ldb = [x for x in si.ast.
|
32
|
-
st: ShapeTracker = ldb.
|
29
|
+
ldb = [x for x in si.ast.parents if x.op is Ops.LOAD][0]
|
30
|
+
st: ShapeTracker = ldb.st_arg.simplify()
|
33
31
|
# NOTE: st.real_size() is broken
|
34
32
|
print(si.inputs[0].size)
|
35
33
|
#self.assertEqual(si.inputs[0].size, st.real_size())
|
@@ -53,11 +51,8 @@ class TestConvShapetracker(unittest.TestCase):
|
|
53
51
|
print(i, i1, i2, si.inputs[0].size, i1==i2)
|
54
52
|
#self.assertEqual(i1, i2)
|
55
53
|
|
56
|
-
|
57
|
-
|
58
|
-
print(s)
|
59
|
-
print(va)
|
60
|
-
assert len(st.views) <= 2
|
54
|
+
with self.assertRaises(AssertionError):
|
55
|
+
assert len(st.views) <= 2
|
61
56
|
|
62
57
|
if __name__ == '__main__':
|
63
58
|
unittest.main()
|
@@ -4,7 +4,7 @@ from tinygrad import Device
|
|
4
4
|
from tinygrad.helpers import Timing, CI, OSX
|
5
5
|
import multiprocessing.shared_memory as shared_memory
|
6
6
|
|
7
|
-
N = 4096
|
7
|
+
N = 4096
|
8
8
|
class TestCopySpeed(unittest.TestCase):
|
9
9
|
@classmethod
|
10
10
|
def setUpClass(cls): Device[Device.DEFAULT].synchronize()
|
@@ -1,13 +1,12 @@
|
|
1
1
|
import unittest
|
2
2
|
from tinygrad import Device
|
3
|
-
from tinygrad.codegen.uopgraph import UOpGraph
|
4
3
|
from tinygrad.helpers import Timing, Profiling
|
5
4
|
|
6
5
|
class TestDeviceSpeed(unittest.TestCase):
|
7
6
|
@classmethod
|
8
7
|
def setUpClass(cls):
|
9
8
|
cls.dev = Device[Device.DEFAULT]
|
10
|
-
cls.empty = Device[Device.DEFAULT].renderer.render("test",
|
9
|
+
cls.empty = Device[Device.DEFAULT].renderer.render("test", [])
|
11
10
|
|
12
11
|
def test_empty_compile(self):
|
13
12
|
with Timing("compiler "):
|