tinygrad 0.9.2__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. {tinygrad-0.9.2/tinygrad.egg-info → tinygrad-0.10.0}/PKG-INFO +10 -7
  2. {tinygrad-0.9.2 → tinygrad-0.10.0}/README.md +4 -1
  3. {tinygrad-0.9.2 → tinygrad-0.10.0}/setup.py +10 -8
  4. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_arange.py +21 -9
  5. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_assign.py +17 -4
  6. tinygrad-0.10.0/test/test_compile_failures.py +18 -0
  7. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_const_folding.py +27 -12
  8. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_conv_shapetracker.py +9 -14
  9. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_copy_speed.py +1 -1
  10. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_device_speed.py +1 -2
  11. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_dtype.py +133 -33
  12. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_dtype_alu.py +19 -44
  13. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_fusion_op.py +28 -9
  14. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_fuzz_shape_ops.py +2 -2
  15. tinygrad-0.10.0/test/test_gc.py +67 -0
  16. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_graph.py +1 -2
  17. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_hcq.py +25 -13
  18. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_image_dtype.py +31 -3
  19. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_jit.py +21 -2
  20. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_lazybuffer.py +34 -13
  21. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_linearizer.py +734 -637
  22. tinygrad-0.10.0/test/test_linearizer_dumb.py +223 -0
  23. tinygrad-0.10.0/test/test_linearizer_failures.py +1435 -0
  24. tinygrad-0.10.0/test/test_linearizer_overflows.py +196 -0
  25. tinygrad-0.10.0/test/test_metal.py +77 -0
  26. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_multitensor.py +110 -21
  27. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_nn.py +219 -33
  28. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_ocl.py +11 -0
  29. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_ops.py +433 -56
  30. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_optim.py +1 -1
  31. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_pickle.py +27 -3
  32. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_profiler.py +4 -3
  33. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_randomness.py +146 -28
  34. tinygrad-0.10.0/test/test_rearrange_einops.py +321 -0
  35. tinygrad-0.10.0/test/test_renderer_failures.py +68 -0
  36. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_sample.py +1 -2
  37. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_schedule.py +462 -192
  38. tinygrad-0.10.0/test/test_search.py +158 -0
  39. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_setitem.py +23 -8
  40. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_specific_conv.py +1 -1
  41. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_speed_v_torch.py +4 -2
  42. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_subbuffer.py +1 -1
  43. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_jit.py +1 -3
  44. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_ops.py +2 -2
  45. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_symbolic_shapetracker.py +37 -40
  46. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor.py +84 -38
  47. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor_variable.py +36 -20
  48. tinygrad-0.10.0/test/test_tiny.py +84 -0
  49. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_transcendental.py +59 -9
  50. tinygrad-0.10.0/test/test_uop_graph.py +716 -0
  51. tinygrad-0.10.0/test/test_uops.py +454 -0
  52. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_uops_stats.py +45 -24
  53. tinygrad-0.10.0/test/test_viz.py +93 -0
  54. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_winograd.py +3 -3
  55. tinygrad-0.10.0/tinygrad/__init__.py +11 -0
  56. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/codegen/kernel.py +214 -214
  57. tinygrad-0.10.0/tinygrad/codegen/linearize.py +95 -0
  58. tinygrad-0.10.0/tinygrad/codegen/lowerer.py +143 -0
  59. tinygrad-0.10.0/tinygrad/codegen/transcendental.py +257 -0
  60. tinygrad-0.10.0/tinygrad/codegen/uopgraph.py +506 -0
  61. tinygrad-0.10.0/tinygrad/device.py +221 -0
  62. tinygrad-0.10.0/tinygrad/dtype.py +188 -0
  63. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/jit.py +57 -38
  64. {tinygrad-0.9.2/tinygrad → tinygrad-0.10.0/tinygrad/engine}/lazy.py +67 -58
  65. tinygrad-0.10.0/tinygrad/engine/memory.py +51 -0
  66. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/realize.py +23 -74
  67. tinygrad-0.10.0/tinygrad/engine/schedule.py +419 -0
  68. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/search.py +33 -29
  69. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/function.py +53 -61
  70. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/helpers.py +88 -108
  71. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/multi.py +53 -54
  72. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/__init__.py +85 -53
  73. tinygrad-0.10.0/tinygrad/nn/datasets.py +15 -0
  74. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/optim.py +1 -1
  75. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/nn/state.py +89 -4
  76. tinygrad-0.10.0/tinygrad/ops.py +1152 -0
  77. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/renderer/__init__.py +21 -19
  78. tinygrad-0.10.0/tinygrad/renderer/cstyle.py +462 -0
  79. tinygrad-0.10.0/tinygrad/renderer/llvmir.py +142 -0
  80. tinygrad-0.10.0/tinygrad/renderer/ptx.py +225 -0
  81. tinygrad-0.10.0/tinygrad/runtime/autogen/adreno.py +17904 -0
  82. tinygrad-0.10.0/tinygrad/runtime/autogen/amd_gpu.py +48384 -0
  83. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/io_uring.py +97 -63
  84. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/kfd.py +60 -79
  85. tinygrad-0.10.0/tinygrad/runtime/autogen/kgsl.py +1386 -0
  86. tinygrad-0.10.0/tinygrad/runtime/autogen/libc.py +5462 -0
  87. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/nv_gpu.py +1976 -1957
  88. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/opencl.py +11 -11
  89. tinygrad-0.10.0/tinygrad/runtime/autogen/qcom_dsp.py +1739 -0
  90. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/clang.py +1 -1
  91. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/cuda.py +3 -4
  92. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/graph/hcq.py +5 -5
  93. tinygrad-0.10.0/tinygrad/runtime/graph/metal.py +103 -0
  94. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_amd.py +86 -57
  95. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_clang.py +11 -4
  96. tinygrad-0.10.0/tinygrad/runtime/ops_cloud.py +220 -0
  97. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_cuda.py +8 -7
  98. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_disk.py +25 -22
  99. tinygrad-0.10.0/tinygrad/runtime/ops_dsp.py +181 -0
  100. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_gpu.py +26 -15
  101. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_hip.py +3 -5
  102. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_llvm.py +15 -10
  103. tinygrad-0.10.0/tinygrad/runtime/ops_metal.py +188 -0
  104. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_nv.py +135 -96
  105. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_python.py +65 -62
  106. tinygrad-0.10.0/tinygrad/runtime/ops_qcom.py +405 -0
  107. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/compiler_cuda.py +6 -7
  108. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/compiler_hip.py +3 -5
  109. tinygrad-0.9.2/tinygrad/device.py → tinygrad-0.10.0/tinygrad/runtime/support/hcq.py +66 -206
  110. tinygrad-0.10.0/tinygrad/shape/__init__.py +0 -0
  111. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/shape/shapetracker.py +38 -39
  112. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/shape/view.py +79 -52
  113. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/tensor.py +778 -336
  114. {tinygrad-0.9.2 → tinygrad-0.10.0/tinygrad.egg-info}/PKG-INFO +10 -7
  115. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/SOURCES.txt +17 -9
  116. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/requires.txt +4 -6
  117. tinygrad-0.9.2/test/test_custom_function.py +0 -106
  118. tinygrad-0.9.2/test/test_gc.py +0 -37
  119. tinygrad-0.9.2/test/test_lazyop.py +0 -34
  120. tinygrad-0.9.2/test/test_linearizer_dumb.py +0 -104
  121. tinygrad-0.9.2/test/test_linearizer_failures.py +0 -467
  122. tinygrad-0.9.2/test/test_linearizer_overflows.py +0 -89
  123. tinygrad-0.9.2/test/test_pattern_matcher.py +0 -186
  124. tinygrad-0.9.2/test/test_renderer_failures.py +0 -43
  125. tinygrad-0.9.2/test/test_search.py +0 -112
  126. tinygrad-0.9.2/test/test_uop_graph.py +0 -662
  127. tinygrad-0.9.2/test/test_uops.py +0 -379
  128. tinygrad-0.9.2/test/test_verify_lazyop.py +0 -76
  129. tinygrad-0.9.2/tinygrad/__init__.py +0 -6
  130. tinygrad-0.9.2/tinygrad/codegen/lowerer.py +0 -215
  131. tinygrad-0.9.2/tinygrad/codegen/transcendental.py +0 -310
  132. tinygrad-0.9.2/tinygrad/codegen/uopgraph.py +0 -622
  133. tinygrad-0.9.2/tinygrad/codegen/uops.py +0 -293
  134. tinygrad-0.9.2/tinygrad/dtype.py +0 -127
  135. tinygrad-0.9.2/tinygrad/engine/graph.py +0 -87
  136. tinygrad-0.9.2/tinygrad/engine/schedule.py +0 -413
  137. tinygrad-0.9.2/tinygrad/nn/datasets.py +0 -8
  138. tinygrad-0.9.2/tinygrad/ops.py +0 -170
  139. tinygrad-0.9.2/tinygrad/renderer/assembly.py +0 -267
  140. tinygrad-0.9.2/tinygrad/renderer/cstyle.py +0 -416
  141. tinygrad-0.9.2/tinygrad/renderer/llvmir.py +0 -151
  142. tinygrad-0.9.2/tinygrad/runtime/autogen/amd_gpu.py +0 -32858
  143. tinygrad-0.9.2/tinygrad/runtime/autogen/libc.py +0 -4260
  144. tinygrad-0.9.2/tinygrad/runtime/graph/metal.py +0 -78
  145. tinygrad-0.9.2/tinygrad/runtime/ops_metal.py +0 -116
  146. tinygrad-0.9.2/tinygrad/shape/symbolic.py +0 -323
  147. {tinygrad-0.9.2 → tinygrad-0.10.0}/LICENSE +0 -0
  148. {tinygrad-0.9.2 → tinygrad-0.10.0}/setup.cfg +0 -0
  149. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_conv.py +0 -0
  150. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_kernel_cache.py +0 -0
  151. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_masked_st.py +0 -0
  152. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_method_cache.py +0 -0
  153. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_net_speed.py +0 -0
  154. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_tensor_data.py +0 -0
  155. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_to_numpy.py +0 -0
  156. {tinygrad-0.9.2 → tinygrad-0.10.0}/test/test_zero_copy.py +0 -0
  157. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/codegen/__init__.py +0 -0
  158. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/engine/__init__.py +0 -0
  159. /tinygrad-0.9.2/tinygrad/runtime/__init__.py → /tinygrad-0.10.0/tinygrad/py.typed +0 -0
  160. {tinygrad-0.9.2/tinygrad/runtime/graph → tinygrad-0.10.0/tinygrad/runtime}/__init__.py +0 -0
  161. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/comgr.py +0 -0
  162. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/cuda.py +0 -0
  163. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/hip.py +0 -0
  164. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/hsa.py +0 -0
  165. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/autogen/nvrtc.py +0 -0
  166. {tinygrad-0.9.2/tinygrad/runtime/support → tinygrad-0.10.0/tinygrad/runtime/graph}/__init__.py +0 -0
  167. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/ops_npy.py +0 -0
  168. {tinygrad-0.9.2/tinygrad/shape → tinygrad-0.10.0/tinygrad/runtime/support}/__init__.py +0 -0
  169. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad/runtime/support/elf.py +0 -0
  170. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/dependency_links.txt +0 -0
  171. {tinygrad-0.9.2 → tinygrad-0.10.0}/tinygrad.egg-info/top_level.txt +0 -0
@@ -1,17 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tinygrad
3
- Version: 0.9.2
3
+ Version: 0.10.0
4
4
  Summary: You like pytorch? You like micrograd? You love tinygrad! <3
5
5
  Author: George Hotz
6
6
  License: MIT
7
7
  Classifier: Programming Language :: Python :: 3
8
8
  Classifier: License :: OSI Approved :: MIT License
9
- Requires-Python: >=3.8
9
+ Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
11
11
  License-File: LICENSE
12
- Requires-Dist: numpy
13
- Requires-Dist: pyobjc-framework-Metal; platform_system == "Darwin"
14
- Requires-Dist: pyobjc-framework-libdispatch; platform_system == "Darwin"
15
12
  Provides-Extra: llvm
16
13
  Requires-Dist: llvmlite; extra == "llvm"
17
14
  Provides-Extra: arm
@@ -20,12 +17,13 @@ Provides-Extra: triton
20
17
  Requires-Dist: triton-nightly>=2.1.0.dev20231014192330; extra == "triton"
21
18
  Provides-Extra: linting
22
19
  Requires-Dist: pylint; extra == "linting"
23
- Requires-Dist: mypy; extra == "linting"
20
+ Requires-Dist: mypy==1.11.2; extra == "linting"
24
21
  Requires-Dist: typing-extensions; extra == "linting"
25
22
  Requires-Dist: pre-commit; extra == "linting"
26
23
  Requires-Dist: ruff; extra == "linting"
27
24
  Requires-Dist: types-tqdm; extra == "linting"
28
25
  Provides-Extra: testing
26
+ Requires-Dist: numpy; extra == "testing"
29
27
  Requires-Dist: torch; extra == "testing"
30
28
  Requires-Dist: pillow; extra == "testing"
31
29
  Requires-Dist: pytest; extra == "testing"
@@ -45,6 +43,7 @@ Requires-Dist: networkx; extra == "testing"
45
43
  Requires-Dist: hypothesis; extra == "testing"
46
44
  Requires-Dist: nibabel; extra == "testing"
47
45
  Requires-Dist: bottle; extra == "testing"
46
+ Requires-Dist: ggml-python; extra == "testing"
48
47
  Provides-Extra: docs
49
48
  Requires-Dist: mkdocs; extra == "docs"
50
49
  Requires-Dist: mkdocs-material; extra == "docs"
@@ -52,6 +51,7 @@ Requires-Dist: mkdocstrings[python]; extra == "docs"
52
51
  Requires-Dist: markdown-callouts; extra == "docs"
53
52
  Requires-Dist: markdown-exec[ansi]; extra == "docs"
54
53
  Requires-Dist: black; extra == "docs"
54
+ Requires-Dist: numpy; extra == "docs"
55
55
  Provides-Extra: testing-tf
56
56
  Requires-Dist: tensorflow==2.15.1; extra == "testing-tf"
57
57
  Requires-Dist: tensorflow_addons; extra == "testing-tf"
@@ -145,9 +145,12 @@ tinygrad already supports numerous accelerators, including:
145
145
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
146
146
  - [x] [AMD](tinygrad/runtime/ops_amd.py)
147
147
  - [x] [NV](tinygrad/runtime/ops_nv.py)
148
+ - [x] [QCOM](tinygrad/runtime/ops_qcom.py)
148
149
 
149
150
  And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
150
151
 
152
+ To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
153
+
151
154
  ## Installation
152
155
 
153
156
  The current recommended way to install tinygrad is from source.
@@ -233,4 +236,4 @@ python3 -m pytest test/ # whole test suite
233
236
 
234
237
  #### Process replay tests
235
238
 
236
- [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [run_process_replay] in the PR title, [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
239
+ [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.
@@ -87,9 +87,12 @@ tinygrad already supports numerous accelerators, including:
87
87
  - [x] [CUDA](tinygrad/runtime/ops_cuda.py)
88
88
  - [x] [AMD](tinygrad/runtime/ops_amd.py)
89
89
  - [x] [NV](tinygrad/runtime/ops_nv.py)
90
+ - [x] [QCOM](tinygrad/runtime/ops_qcom.py)
90
91
 
91
92
  And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
92
93
 
94
+ To check default accelerator run: `python3 -c "from tinygrad import Device; print(Device.DEFAULT)"`
95
+
93
96
  ## Installation
94
97
 
95
98
  The current recommended way to install tinygrad is from source.
@@ -175,4 +178,4 @@ python3 -m pytest test/ # whole test suite
175
178
 
176
179
  #### Process replay tests
177
180
 
178
- [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/process_replay.py) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [run_process_replay] in the PR title, [example](https://github.com/tinygrad/tinygrad/pull/4995). Note that you should keep your branch up-to-date with master.
181
+ [Process replay](https://github.com/tinygrad/tinygrad/blob/master/test/external/process_replay/README.md) compares your PR's generated kernels against master. If your PR is a refactor or speedup without any expected behavior change, It should include [pr] in the pull request title.
@@ -8,7 +8,7 @@ with open(directory / 'README.md', encoding='utf-8') as f:
8
8
  long_description = f.read()
9
9
 
10
10
  setup(name='tinygrad',
11
- version='0.9.2',
11
+ version='0.10.0',
12
12
  description='You like pytorch? You like micrograd? You love tinygrad! <3',
13
13
  author='George Hotz',
14
14
  license='MIT',
@@ -16,28 +16,28 @@ setup(name='tinygrad',
16
16
  long_description_content_type='text/markdown',
17
17
  packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
18
18
  'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.graph', 'tinygrad.shape'],
19
+ package_data = {'tinygrad': ['py.typed']},
19
20
  classifiers=[
20
21
  "Programming Language :: Python :: 3",
21
22
  "License :: OSI Approved :: MIT License"
22
23
  ],
23
- install_requires=["numpy",
24
- "pyobjc-framework-Metal; platform_system=='Darwin'",
25
- "pyobjc-framework-libdispatch; platform_system=='Darwin'"],
26
- python_requires='>=3.8',
24
+ install_requires=[],
25
+ python_requires='>=3.10',
27
26
  extras_require={
28
27
  'llvm': ["llvmlite"],
29
28
  'arm': ["unicorn"],
30
29
  'triton': ["triton-nightly>=2.1.0.dev20231014192330"],
31
30
  'linting': [
32
31
  "pylint",
33
- "mypy",
32
+ "mypy==1.11.2",
34
33
  "typing-extensions",
35
34
  "pre-commit",
36
35
  "ruff",
37
36
  "types-tqdm",
38
37
  ],
39
- #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.0.0-rc2"],
38
+ #'mlperf': ["mlperf-logging @ git+https://github.com/mlperf/logging.git@4.1.0-rc3"],
40
39
  'testing': [
40
+ "numpy",
41
41
  "torch",
42
42
  "pillow",
43
43
  "pytest",
@@ -57,6 +57,7 @@ setup(name='tinygrad',
57
57
  "hypothesis",
58
58
  "nibabel",
59
59
  "bottle",
60
+ "ggml-python"
60
61
  ],
61
62
  'docs': [
62
63
  "mkdocs",
@@ -64,7 +65,8 @@ setup(name='tinygrad',
64
65
  "mkdocstrings[python]",
65
66
  "markdown-callouts",
66
67
  "markdown-exec[ansi]",
67
- "black"
68
+ "black",
69
+ "numpy",
68
70
  ],
69
71
  'testing_tf': [
70
72
  "tensorflow==2.15.1",
@@ -23,16 +23,21 @@ class TestArange(unittest.TestCase):
23
23
  np.testing.assert_equal(tt.numpy(), np.arange(N))
24
24
  return p.op_estimate
25
25
 
26
- def test_complexity(self, opts=None):
26
+ def test_complexity(self, opts=None, limit=None):
27
27
  # add 1 to avoid divide by 0. arange is 0 flops now!
28
28
  f1 = self._get_flops(256, opts) + 1
29
29
  f2 = self._get_flops(2560, opts) + 1
30
30
  print(f"{f1=}, {f2=}")
31
31
  assert (f1 < 5000 and f2 < 5000) or (f2 / f1 < 15), f"bad complexity, flops {f2/f1:.1f}X while inputs 10X"
32
+ if limit is not None and not getenv("PTX"):
33
+ # PTX counts index ALU in flops
34
+ assert f1 <= limit, f"{f1=}, {limit=}"
32
35
 
33
- def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)])
34
- def test_complexity_w_unroll(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 4)])
35
- def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)])
36
+ def test_complexity_w_upcast(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4)], limit=1)
37
+ def test_complexity_w_unroll2(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 2)], limit=1)
38
+ def test_complexity_w_unroll4(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 4)], limit=1)
39
+ def test_complexity_w_unroll8(self): return self.test_complexity([Opt(OptOps.UNROLL, 0, 8)], limit=1)
40
+ def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], limit=1)
36
41
 
37
42
  @unittest.skip("doesn't work yet")
38
43
  def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(op=OptOps.PADTO, axis=1, amt=32)])
@@ -125,23 +130,30 @@ class TestIndexing(unittest.TestCase):
125
130
  @unittest.skip("not ready")
126
131
  def test_index_fused_opt(self): self.test_index_fused(0)
127
132
 
133
+ def test_index_fused_out_of_bounds(self):
134
+ dataset = Tensor.rand(256, 256).realize()
135
+ idxs = Tensor([-19238, -257, 256, 495, 10982377]).realize()
136
+ with Context(NOOPT=1, FUSE_ARANGE=1):
137
+ X = dataset[idxs]
138
+ np.testing.assert_equal(X.numpy(), 0)
139
+
128
140
  @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
129
- def test_index_mnist(self, noopt=1):
141
+ def test_index_mnist(self, noopt=1, op_limit=512*784*5):
130
142
  from tinygrad.nn.datasets import mnist
131
143
  X_train, Y_train, _, _ = mnist()
132
144
  with Context(NOOPT=noopt, FUSE_ARANGE=1, SPLIT_REDUCEOP=0):
145
+ samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0]).realize()
133
146
  GlobalCounters.reset()
134
- samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0])
135
147
  x = X_train[samples].numpy()
136
148
  y = Y_train[samples].numpy()
137
- assert GlobalCounters.global_ops < 4*16384, f"too many ops {GlobalCounters.global_ops} != {4*16384}"
149
+ assert GlobalCounters.global_ops < op_limit, f"too many ops {GlobalCounters.global_ops} != {op_limit}"
138
150
  np.testing.assert_allclose(X_train.numpy()[samples.numpy()], x)
139
151
  np.testing.assert_allclose(Y_train.numpy()[samples.numpy()], y)
140
152
  @unittest.skip("not ready")
141
153
  def test_index_mnist_opt(self): self.test_index_mnist(0)
142
154
 
143
155
  @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
144
- def test_llama_embedding(self, noopt=1, op_limit=0):
156
+ def test_llama_embedding(self, noopt=1, op_limit=65536):
145
157
  # llama3 is 128256
146
158
  vocab_size, embed_size = (10, 3) if CI else (32000, 4096)
147
159
  emb = nn.Embedding(vocab_size, embed_size)
@@ -161,7 +173,7 @@ class TestIndexing(unittest.TestCase):
161
173
  # TODO: reshape to match torch, should we do this in nn?
162
174
  np.testing.assert_allclose(z.numpy().reshape(4, embed_size), torch_z.detach().numpy(), atol=1e-8, rtol=1e-8)
163
175
  # at least the arange is being fused
164
- def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1736704000)
176
+ def test_llama_embedding_opt(self): self.test_llama_embedding(0, 1_736_704_000 if CI else 5_898_240_000)
165
177
 
166
178
  if __name__ == "__main__":
167
179
  unittest.main()
@@ -2,6 +2,7 @@
2
2
  import unittest
3
3
  import numpy as np
4
4
  from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
5
+ from tinygrad.engine.schedule import create_schedule
5
6
 
6
7
  N = 200 # has to be bigger than the cache to fail
7
8
 
@@ -57,10 +58,12 @@ class TestAssign(unittest.TestCase):
57
58
  x.realize()
58
59
  x = Tensor([0])
59
60
  f(x)
60
- assert (out:=x.item()) == 1, f"expected 1, got {out}"
61
+ out = x.item()
62
+ assert out == 1, f"expected 1, got {out}"
61
63
  x = Tensor([0])
62
64
  f(x)
63
- assert (out:=x.item()) == 1, f"expected 1, got {out}"
65
+ out = x.item()
66
+ assert out == 1, f"expected 1, got {out}"
64
67
 
65
68
  def test_assign_add_jit(self):
66
69
  @TinyJit
@@ -165,6 +168,16 @@ class TestAssign(unittest.TestCase):
165
168
  a += 1
166
169
  np.testing.assert_allclose(a.numpy(), 3)
167
170
 
171
+ # NOTE: this is similar to the resnet failure
172
+ #@unittest.expectedFailure
173
+ def test_double_assign_alt(self):
174
+ a = Tensor.ones(4).contiguous().realize()
175
+ b = Tensor([1, 2, 3, 4]).realize().lazydata
176
+ a1 = a.lazydata.assign(b)
177
+ a2 = a.lazydata.assign(b)
178
+ sched = create_schedule([a1, a2])
179
+ self.assertEqual(len(sched), 1)
180
+
168
181
  def test_crossover_assign(self):
169
182
  a = Tensor.full((4,), 2).contiguous().realize()
170
183
  b = Tensor.full((4,), 3).contiguous().realize()
@@ -347,7 +360,7 @@ class TestAssign(unittest.TestCase):
347
360
 
348
361
  def test_permuted_assignment_masked_view_possible(self):
349
362
  a = Tensor.ones(4, 4).contiguous().realize()
350
- b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2)
363
+ b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2)
351
364
  a.assign(a + b)
352
365
  kc = GlobalCounters.kernel_count
353
366
  a.realize()
@@ -357,7 +370,7 @@ class TestAssign(unittest.TestCase):
357
370
  def test_permuted_assignment_masked_view_not_contiguous(self):
358
371
  a = Tensor.ones(4, 4).contiguous().realize()
359
372
  with self.assertRaisesRegex(RuntimeError, "contiguous"):
360
- b = a.shrink((None, (0, 2))).pad((None, (0, 2)), 2).permute(1, 0)
373
+ b = a.shrink((None, (0, 2))).pad((None, (0, 2)), value=2).permute(1, 0)
361
374
  a.assign(a + b)
362
375
  a.realize()
363
376
 
@@ -0,0 +1,18 @@
1
+ import unittest
2
+ from tinygrad import Tensor, dtypes, Device
3
+ from tinygrad.engine.realize import lower_schedule
4
+ from tinygrad.device import is_dtype_supported
5
+
6
+ class TestCompileFailures(unittest.TestCase):
7
+ def compile(self, out:Tensor):
8
+ for _ in lower_schedule(out.schedule()): pass
9
+
10
+ @unittest.skipUnless(is_dtype_supported(dtypes.uchar, Device.DEFAULT), f"no uint8 on {Device.DEFAULT}")
11
+ def test_interpolate_atari(self):
12
+ self.compile(Tensor.empty(210, 160, dtype='uint8').interpolate((64, 64)))
13
+
14
+ def test_add_max_uchar(self):
15
+ self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
16
+
17
+ if __name__ == '__main__':
18
+ unittest.main()
@@ -1,15 +1,15 @@
1
1
  import unittest, math
2
2
  from tinygrad import Tensor, Device, dtypes
3
+ from tinygrad.ops import Ops
3
4
  from tinygrad.engine.schedule import create_schedule
4
5
  from tinygrad.helpers import CI
5
- from tinygrad.ops import MetaOps
6
6
  import numpy as np
7
- from test.helpers import is_dtype_supported
7
+ from tinygrad.device import is_dtype_supported
8
8
 
9
9
  def _check_ast_count(desired_count:int, t:Tensor):
10
10
  # NOTE: this has side effect because everything can be scheduled only once
11
11
  schedule = create_schedule(t.lazydata.lbs)
12
- asts = [s for s in schedule if s.ast.op is MetaOps.KERNEL]
12
+ asts = [s for s in schedule if s.ast.op is Ops.SINK]
13
13
  assert len(asts) == desired_count
14
14
 
15
15
  class TestUnaryOpsConstFolding(unittest.TestCase):
@@ -23,6 +23,7 @@ class TestUnaryOpsConstFolding(unittest.TestCase):
23
23
  _check_ast_count(0, Tensor.ones(4).cast(dtypes.int16))
24
24
  _check_ast_count(0, Tensor.full(4, fill_value=-1).cast(dtypes.uint16))
25
25
 
26
+ @unittest.expectedFailure # no two level fold at lazybuffer
26
27
  def test_neg_folding(self):
27
28
  _check_ast_count(0, Tensor([1, 2, 3]).mul(-1).neg())
28
29
  _check_ast_count(0, Tensor([1, 2, 3]).neg().mul(-1))
@@ -78,6 +79,11 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
78
79
  def test_div_tensor_one(self):
79
80
  _check_ast_count(0, Tensor([1.0, 2, 3, 4]) / Tensor.ones(4))
80
81
 
82
+ def test_idiv_literal_one(self):
83
+ _check_ast_count(0, Tensor([1, 2, 3, 4]) // 1)
84
+ def test_idiv_tensor_one(self):
85
+ _check_ast_count(0, Tensor([1, 2, 3, 4]) // Tensor.ones(4, dtype=dtypes.int32))
86
+
81
87
  def test_pow_literal_zero(self):
82
88
  _check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** 0)
83
89
  def test_pow_tensor_zero(self):
@@ -124,13 +130,16 @@ class TestMovedConstFolding(unittest.TestCase):
124
130
 
125
131
  def test_cast_padded(self):
126
132
  # NOTE: this is folded due to CAST_BEFORE_VIEW
127
- _check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
128
- np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
129
- _check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
130
- np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
133
+ if is_dtype_supported(dtypes.int16):
134
+ _check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
135
+ np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
136
+ if is_dtype_supported(dtypes.uint16):
137
+ _check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
138
+ np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
131
139
  # not folded
132
- _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
133
- np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
140
+ if is_dtype_supported(dtypes.int64):
141
+ _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64))
142
+ np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int64).numpy(), [0, 1, 1, 1, 1, 0])
134
143
 
135
144
  class TestReduceOpsConstFolding(unittest.TestCase):
136
145
  def test_const_sum(self):
@@ -145,10 +154,18 @@ class TestReduceOpsConstFolding(unittest.TestCase):
145
154
  _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).sum())
146
155
  np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).sum().numpy(), 4)
147
156
 
148
- # NOTE: cannot just count the non-padded area because some UnaryOps f do not have f(0) = 0.
157
+ # NOTE: cannot just count the non-padded area because some Ops f do not have f(0) = 0.
149
158
  _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).exp().sum())
150
159
  np.testing.assert_allclose(Tensor.ones(4).pad(((1, 1),)).exp().sum().numpy(), 4 * math.e + 2)
151
160
 
161
+ def test_const_prod(self):
162
+ _check_ast_count(0, Tensor.full((2, 3), fill_value=2).prod())
163
+ np.testing.assert_equal(Tensor.full((2, 3), fill_value=2).prod().numpy(), 2**(2*3))
164
+ _check_ast_count(0, Tensor.full((4, 5, 6), fill_value=2).prod(axis=0))
165
+ np.testing.assert_equal(Tensor.full((4, 5, 6), fill_value=2).prod(axis=0).numpy(), np.full((5, 6), 2**4))
166
+ _check_ast_count(0, Tensor(4).prod())
167
+ np.testing.assert_equal(Tensor(4).prod().numpy(), 4)
168
+
152
169
  def test_const_max(self):
153
170
  _check_ast_count(0, Tensor.ones(4, 5, 6).max())
154
171
  np.testing.assert_equal(Tensor.ones(4, 5, 6).max().numpy(), 1)
@@ -234,7 +251,6 @@ class TestTautologicalCompare(unittest.TestCase):
234
251
  np.testing.assert_equal((Tensor(True) < Tensor(False)).numpy(), False)
235
252
  np.testing.assert_equal((Tensor(True) < Tensor(True)).numpy(), False)
236
253
 
237
- @unittest.skip("not implemented yet")
238
254
  def test_a_eq_a(self):
239
255
  # self eq is always true for int or bool
240
256
  a = Tensor([1, 2, 3])
@@ -244,7 +260,6 @@ class TestTautologicalCompare(unittest.TestCase):
244
260
  a = Tensor([math.nan, 1.0, 2.0])
245
261
  np.testing.assert_equal((a == a).numpy(), [False, True, True])
246
262
 
247
- @unittest.skip("not implemented yet")
248
263
  def test_a_ne_a(self):
249
264
  # self not eq is always false for int or bool
250
265
  a = Tensor([1, 2, 3])
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  import unittest
3
+ from tinygrad.ops import Ops
3
4
  from tinygrad.tensor import Tensor
4
- from tinygrad.ops import MetaOps, BufferOps
5
5
  from tinygrad.nn import Conv2d
6
6
  from tinygrad.engine.schedule import create_schedule
7
7
  from tinygrad.shape.shapetracker import ShapeTracker, View
@@ -11,25 +11,23 @@ from test.unit.test_shapetracker import shapetracker_getitem
11
11
  class TestConvShapetracker(unittest.TestCase):
12
12
  def test_conv_3x3_one_view(self):
13
13
  conv = Conv2d(16, 32, (3, 3))
14
- seen = set()
15
14
 
16
- # first run to init the weights, they are saved in seen
17
- create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata], seen)
15
+ # first run to init the weights, they are scheduled.
16
+ create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata])
18
17
  # run it again to get the kernels
19
- sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata], seen) if si.ast.op is MetaOps.KERNEL]
18
+ sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]) if si.ast.op is Ops.SINK]
20
19
  assert len(sched) == 1, f"conv should only have one kernel, getting {len(sched)}"
21
- for st in [x.arg.st for x in sched[0].ast.lazyops if x.op is BufferOps.LOAD]:
20
+ for st in [x.st_arg for x in sched[0].ast.parents if x.op is Ops.LOAD]:
22
21
  assert len(st.views) == 1
23
22
 
24
- @unittest.expectedFailure
25
23
  def test_conv_2x2_backward_one_view(self):
26
24
  X = Tensor.rand(1, 1, 3, 3, requires_grad=True)
27
25
  conv = Conv2d(1, 1, (2, 2), bias=False)
28
26
  conv(X).mean().backward()
29
27
  si = X.grad.schedule()[-1]
30
28
  print(si)
31
- ldb = [x for x in si.ast.lazyops if x.op is BufferOps.LOAD][0]
32
- st: ShapeTracker = ldb.arg.st.simplify()
29
+ ldb = [x for x in si.ast.parents if x.op is Ops.LOAD][0]
30
+ st: ShapeTracker = ldb.st_arg.simplify()
33
31
  # NOTE: st.real_size() is broken
34
32
  print(si.inputs[0].size)
35
33
  #self.assertEqual(si.inputs[0].size, st.real_size())
@@ -53,11 +51,8 @@ class TestConvShapetracker(unittest.TestCase):
53
51
  print(i, i1, i2, si.inputs[0].size, i1==i2)
54
52
  #self.assertEqual(i1, i2)
55
53
 
56
- for stt in [st, test_st]:
57
- s,va = stt.expr_idxs()
58
- print(s)
59
- print(va)
60
- assert len(st.views) <= 2
54
+ with self.assertRaises(AssertionError):
55
+ assert len(st.views) <= 2
61
56
 
62
57
  if __name__ == '__main__':
63
58
  unittest.main()
@@ -4,7 +4,7 @@ from tinygrad import Device
4
4
  from tinygrad.helpers import Timing, CI, OSX
5
5
  import multiprocessing.shared_memory as shared_memory
6
6
 
7
- N = 4096 if CI else 16384
7
+ N = 4096
8
8
  class TestCopySpeed(unittest.TestCase):
9
9
  @classmethod
10
10
  def setUpClass(cls): Device[Device.DEFAULT].synchronize()
@@ -1,13 +1,12 @@
1
1
  import unittest
2
2
  from tinygrad import Device
3
- from tinygrad.codegen.uopgraph import UOpGraph
4
3
  from tinygrad.helpers import Timing, Profiling
5
4
 
6
5
  class TestDeviceSpeed(unittest.TestCase):
7
6
  @classmethod
8
7
  def setUpClass(cls):
9
8
  cls.dev = Device[Device.DEFAULT]
10
- cls.empty = Device[Device.DEFAULT].renderer.render("test", UOpGraph([]))
9
+ cls.empty = Device[Device.DEFAULT].renderer.render("test", [])
11
10
 
12
11
  def test_empty_compile(self):
13
12
  with Timing("compiler "):