PyPI - tinygrad - Versions diffs - 0.10.0__tar.gz → 0.10.1__tar.gz - Mend

tinygrad 0.10.0tar.gz → 0.10.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{tinygrad-0.10.0 → tinygrad-0.10.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: tinygrad
-Version: 0.10.0
+Version: 0.10.1
 Summary: You like pytorch? You like micrograd? You love tinygrad! <3
 Author: George Hotz
 License: MIT
@@ -9,15 +9,13 @@ Classifier: License :: OSI Approved :: MIT License
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Provides-Extra: llvm
-Requires-Dist: llvmlite; extra == "llvm"
 Provides-Extra: arm
 Requires-Dist: unicorn; extra == "arm"
 Provides-Extra: triton
 Requires-Dist: triton-nightly>=2.1.0.dev20231014192330; extra == "triton"
 Provides-Extra: linting
 Requires-Dist: pylint; extra == "linting"
-Requires-Dist: mypy==1.11.2; extra == "linting"
+Requires-Dist: mypy==1.13.0; extra == "linting"
 Requires-Dist: typing-extensions; extra == "linting"
 Requires-Dist: pre-commit; extra == "linting"
 Requires-Dist: ruff; extra == "linting"
@@ -25,6 +23,7 @@ Requires-Dist: types-tqdm; extra == "linting"
 Provides-Extra: testing
 Requires-Dist: numpy; extra == "testing"
 Requires-Dist: torch; extra == "testing"
+Requires-Dist: jax; extra == "testing"
 Requires-Dist: pillow; extra == "testing"
 Requires-Dist: pytest; extra == "testing"
 Requires-Dist: pytest-xdist; extra == "testing"
@@ -44,6 +43,9 @@ Requires-Dist: hypothesis; extra == "testing"
 Requires-Dist: nibabel; extra == "testing"
 Requires-Dist: bottle; extra == "testing"
 Requires-Dist: ggml-python; extra == "testing"
+Requires-Dist: capstone; extra == "testing"
+Provides-Extra: webgpu
+Requires-Dist: wgpu; extra == "webgpu"
 Provides-Extra: docs
 Requires-Dist: mkdocs; extra == "docs"
 Requires-Dist: mkdocs-material; extra == "docs"
@@ -55,6 +57,14 @@ Requires-Dist: numpy; extra == "docs"
 Provides-Extra: testing-tf
 Requires-Dist: tensorflow==2.15.1; extra == "testing-tf"
 Requires-Dist: tensorflow_addons; extra == "testing-tf"
+Dynamic: author
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-python
+Dynamic: summary
 <div align="center">
@@ -146,6 +156,7 @@ tinygrad already supports numerous accelerators, including:
 - [x] [AMD](tinygrad/runtime/ops_amd.py)
 - [x] [NV](tinygrad/runtime/ops_nv.py)
 - [x] [QCOM](tinygrad/runtime/ops_qcom.py)
+- [x] [WEBGPU](tinygrad/runtime/ops_webgpu.py)
 And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
@@ -183,8 +194,8 @@ y = Tensor([[2.0,0,-2.0]], requires_grad=True)
 z = y.matmul(x).sum()
 z.backward()
-print(x.grad.numpy())  # dz/dx
-print(y.grad.numpy())  # dz/dy
+print(x.grad.tolist())  # dz/dx
+print(y.grad.tolist())  # dz/dy
 ```
 The same thing but in PyTorch:
@@ -196,8 +207,8 @@ y = torch.tensor([[2.0,0,-2.0]], requires_grad=True)
 z = y.matmul(x).sum()
 z.backward()
-print(x.grad.numpy())  # dz/dx
-print(y.grad.numpy())  # dz/dy
+print(x.grad.tolist())  # dz/dx
+print(y.grad.tolist())  # dz/dy
 ```
 ## Contributing

{tinygrad-0.10.0 → tinygrad-0.10.1}/README.md RENAMED Viewed

@@ -88,6 +88,7 @@ tinygrad already supports numerous accelerators, including:
 - [x] [AMD](tinygrad/runtime/ops_amd.py)
 - [x] [NV](tinygrad/runtime/ops_nv.py)
 - [x] [QCOM](tinygrad/runtime/ops_qcom.py)
+- [x] [WEBGPU](tinygrad/runtime/ops_webgpu.py)
 And it is easy to add more! Your accelerator of choice only needs to support a total of ~25 low level ops.
@@ -125,8 +126,8 @@ y = Tensor([[2.0,0,-2.0]], requires_grad=True)
 z = y.matmul(x).sum()
 z.backward()
-print(x.grad.numpy())  # dz/dx
-print(y.grad.numpy())  # dz/dy
+print(x.grad.tolist())  # dz/dx
+print(y.grad.tolist())  # dz/dy
 ```
 The same thing but in PyTorch:
@@ -138,8 +139,8 @@ y = torch.tensor([[2.0,0,-2.0]], requires_grad=True)
 z = y.matmul(x).sum()
 z.backward()
-print(x.grad.numpy())  # dz/dx
-print(y.grad.numpy())  # dz/dy
+print(x.grad.tolist())  # dz/dx
+print(y.grad.tolist())  # dz/dy
 ```
 ## Contributing

{tinygrad-0.10.0 → tinygrad-0.10.1}/setup.py RENAMED Viewed

@@ -8,14 +8,14 @@ with open(directory / 'README.md', encoding='utf-8') as f:
   long_description = f.read()
 setup(name='tinygrad',
-      version='0.10.0',
+      version='0.10.1',
       description='You like pytorch? You like micrograd? You love tinygrad! <3',
       author='George Hotz',
       license='MIT',
       long_description=long_description,
       long_description_content_type='text/markdown',
       packages = ['tinygrad', 'tinygrad.runtime.autogen', 'tinygrad.codegen', 'tinygrad.nn', 'tinygrad.renderer', 'tinygrad.engine',
-                  'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.graph', 'tinygrad.shape'],
+                  'tinygrad.runtime', 'tinygrad.runtime.support', 'tinygrad.runtime.support.am', 'tinygrad.runtime.graph', 'tinygrad.shape'],
       package_data = {'tinygrad': ['py.typed']},
       classifiers=[
         "Programming Language :: Python :: 3",
@@ -24,12 +24,11 @@ setup(name='tinygrad',
       install_requires=[],
       python_requires='>=3.10',
       extras_require={
-        'llvm': ["llvmlite"],
         'arm': ["unicorn"],
         'triton': ["triton-nightly>=2.1.0.dev20231014192330"],
         'linting': [
             "pylint",
-            "mypy==1.11.2",
+            "mypy==1.13.0",
             "typing-extensions",
             "pre-commit",
             "ruff",
@@ -39,6 +38,7 @@ setup(name='tinygrad',
         'testing': [
             "numpy",
             "torch",
+            "jax",
             "pillow",
             "pytest",
             "pytest-xdist",
@@ -57,8 +57,10 @@ setup(name='tinygrad',
             "hypothesis",
             "nibabel",
             "bottle",
-            "ggml-python"
+            "ggml-python",
+            "capstone"
         ],
+        'webgpu': ["wgpu"],
         'docs': [
             "mkdocs",
             "mkdocs-material",

{tinygrad-0.10.0 → tinygrad-0.10.1}/test/test_arange.py RENAMED Viewed

@@ -1,11 +1,12 @@
 import unittest, contextlib
 import numpy as np
-from tinygrad import Tensor, GlobalCounters, dtypes, nn
+from tinygrad import Tensor, GlobalCounters, dtypes, nn, Device
 from tinygrad.helpers import CI, Context, getenv
 from tinygrad.engine.realize import run_schedule
 from tinygrad.codegen.kernel import Opt, OptOps, Kernel, KernelOptError
 from tinygrad.engine.realize import CompiledRunner, ExecItem
 from tinygrad.engine.search import get_kernel_actions
+from tinygrad.ops import Ops
 class TestArange(unittest.TestCase):
   def _get_flops(self, N, opts=None):
@@ -21,7 +22,7 @@ class TestArange(unittest.TestCase):
     #print(p.src)
     ExecItem(CompiledRunner(p), [tt.lazydata.buffer]).run()
     np.testing.assert_equal(tt.numpy(), np.arange(N))
-    return p.op_estimate
+    return p.estimates.ops
   def test_complexity(self, opts=None, limit=None):
     # add 1 to avoid divide by 0. arange is 0 flops now!
@@ -40,7 +41,7 @@ class TestArange(unittest.TestCase):
   def test_complexity_w_upcast_and_unroll(self): return self.test_complexity([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], limit=1)
   @unittest.skip("doesn't work yet")
-  def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(op=OptOps.PADTO, axis=1, amt=32)])
+  def test_complexity_w_local_and_padto(self): return self.test_complexity([Opt(OptOps.LOCAL, 0, 16), Opt(op=OptOps.PADTO, axis=1, arg=32)])
   def test_all_opts(self, opts=None, exclude=None):
     k = Kernel(Tensor.arange(256).schedule()[-1].ast)
@@ -58,13 +59,15 @@ class TestArange(unittest.TestCase):
       self.test_complexity(opts)
   def test_all_opts_w_local(self):
     with contextlib.suppress(KernelOptError):
-      return self.test_all_opts([Opt(OptOps.LOCAL, 0, 16)], [Opt(op=OptOps.PADTO, axis=1, amt=32)])
+      return self.test_all_opts([Opt(OptOps.LOCAL, 0, 16)], [Opt(op=OptOps.PADTO, axis=1, arg=32)])
   def test_all_opts_w_upcast(self): return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4)])
-  def test_all_opts_w_unroll(self): return self.test_all_opts([Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, amt=0)])
+  def test_all_opts_w_unroll(self): return self.test_all_opts([Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
   def test_all_opts_w_upcast_and_unroll(self):
-    return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, amt=0)])
+    return self.test_all_opts([Opt(OptOps.UPCAST, 0, 4), Opt(OptOps.UNROLL, 0, 4)], [Opt(op=OptOps.GROUP, axis=0, arg=0)])
 class TestIndexing(unittest.TestCase):
+  # update: passing after CAST_BEFORE_VIEW=1 deletion
+  # @unittest.expectedFailure
   def test_arange_2_reduce(self):
     needle = Tensor.zeros(16384, dtype=dtypes.int).contiguous()
     needle[1337] = 1
@@ -86,7 +89,7 @@ class TestIndexing(unittest.TestCase):
     print("*** indexing ***")
     with Context(NOOPT=1, FUSE_ARANGE=1):
       GlobalCounters.reset()
-      rng = Tensor.ones(4, 256, 16384, dtype=dtypes.int)._cumsum(axis=-1, _first_zero=True).reshape(4, 256, 16384, 1)
+      rng = Tensor.ones(4, 256, 16384, dtype=dtypes.int)._cumalu(axis=-1, op=Ops.ADD, _include_initial=True).reshape(4, 256, 16384, 1)
       idxs = idxs.reshape(4,1,1,1).expand(4, 256, 16384, 1)
       reshape_dataset = dataset.T.reshape(1, 256, 16384, 1).expand(4, 256, 16384, 1)
       full = (rng==idxs).where(reshape_dataset, Tensor.zeros(4, 256, 16384, 1))
@@ -138,7 +141,7 @@ class TestIndexing(unittest.TestCase):
       np.testing.assert_equal(X.numpy(), 0)
   @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
-  def test_index_mnist(self, noopt=1, op_limit=512*784*5):
+  def test_index_mnist(self, noopt=1, op_limit=512*784*13):
     from tinygrad.nn.datasets import mnist
     X_train, Y_train, _, _ = mnist()
     with Context(NOOPT=noopt, FUSE_ARANGE=1, SPLIT_REDUCEOP=0):
@@ -152,12 +155,13 @@ class TestIndexing(unittest.TestCase):
   @unittest.skip("not ready")
   def test_index_mnist_opt(self): self.test_index_mnist(0)
-  @unittest.skipIf(getenv("PTX"), "broken on ptx for some reason")
+  @unittest.skipIf(getenv("PTX") or Device.DEFAULT == "WEBGPU", "broken on ptx and WebGPU for some reason")
   def test_llama_embedding(self, noopt=1, op_limit=65536):
     # llama3 is 128256
     vocab_size, embed_size = (10, 3) if CI else (32000, 4096)
     emb = nn.Embedding(vocab_size, embed_size)
-    emb_w = emb.weight.numpy()
+    # TODO: why is a new realize needed here
+    emb_w = emb.weight.realize().numpy()
     x = Tensor([1,2,3,4])
     with Context(NOOPT=noopt, FUSE_ARANGE=1):
       GlobalCounters.reset()

{tinygrad-0.10.0 → tinygrad-0.10.1}/test/test_assign.py RENAMED Viewed

@@ -2,7 +2,8 @@
 import unittest
 import numpy as np
 from tinygrad import dtypes, Tensor, TinyJit, GlobalCounters, Variable
-from tinygrad.engine.schedule import create_schedule
+from tinygrad.device import is_dtype_supported
+from tinygrad.helpers import temp
 N = 200  # has to be bigger than the cache to fail
@@ -168,16 +169,6 @@ class TestAssign(unittest.TestCase):
     a += 1
     np.testing.assert_allclose(a.numpy(), 3)
-  # NOTE: this is similar to the resnet failure
-  #@unittest.expectedFailure
-  def test_double_assign_alt(self):
-    a = Tensor.ones(4).contiguous().realize()
-    b = Tensor([1, 2, 3, 4]).realize().lazydata
-    a1 = a.lazydata.assign(b)
-    a2 = a.lazydata.assign(b)
-    sched = create_schedule([a1, a2])
-    self.assertEqual(len(sched), 1)
   def test_crossover_assign(self):
     a = Tensor.full((4,), 2).contiguous().realize()
     b = Tensor.full((4,), 3).contiguous().realize()
@@ -293,6 +284,7 @@ class TestAssign(unittest.TestCase):
       #assert ba1 == ba2 and ba1 != bb1
       np.testing.assert_allclose(a.numpy(), np.arange(N*N).reshape((N,N)) + np.arange(N*N).reshape((N,N)).transpose(1,0))
+  @unittest.skip("multi output not supported anymore")
   def test_simple_assignment_multioutput(self):
     a = Tensor.randn(32, 32).realize()
     b = Tensor.full((32, ), 1.).contiguous().realize()
@@ -331,6 +323,7 @@ class TestAssign(unittest.TestCase):
       b.assign(r + b.permute(1, 0))
       b.realize()
+  @unittest.skip("multi output not supported anymore")
   def test_permuted_reduceop_multioutput_dual_use(self):
     a = Tensor.randn(32, 32, 32).realize()
     b = Tensor.full((32, 32), 1.).contiguous().realize()
@@ -343,6 +336,7 @@ class TestAssign(unittest.TestCase):
       c.assign(r + b_perm)
       Tensor.realize(b, c)
+  @unittest.skip("multi output not supported anymore")
   def test_permuted_reduceop_multioutput_dual_use_possible(self):
     a = Tensor.randn(32, 32, 32, dtype=dtypes.int).realize()
     b = Tensor.arange(32 * 32).reshape(32, 32).realize()
@@ -376,6 +370,14 @@ class TestAssign(unittest.TestCase):
   # TODO: is there a way to sneak in a permute such that it returns the wrong answer?
+  @unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
+  def test_setitem_half(self):
+    a = Tensor.full((8,), 1.0, dtype=dtypes.half).contiguous().realize()
+    b = Tensor.full((4,), 2.0, dtype=dtypes.half).contiguous().realize()
+    assign = a[:4].assign(b)
+    assign.realize()
+    np.testing.assert_allclose(a.numpy(), [2., 2., 2., 2., 1., 1., 1., 1.])
   @unittest.skip("don't use output buffer, and mismatch dtype no longer supported")
   def test_cast_assignment(self):
     a = Tensor(np.arange(N*N, dtype=np.float32)).reshape(N,N)
@@ -387,5 +389,9 @@ class TestAssign(unittest.TestCase):
     assert oba1 is None and oba2 is None
     np.testing.assert_allclose(a.numpy(), np.arange(N*N,dtype=np.int32).reshape((N,N)))
+  def test_disk_assignment(self):
+    a = Tensor.empty(5, device=f"disk:{temp('disk_assignment')}").assign(Tensor.ones(5)).numpy()
+    np.testing.assert_equal(a, np.ones(5))
 if __name__ == "__main__":
   unittest.main()

{tinygrad-0.10.0 → tinygrad-0.10.1}/test/test_const_folding.py RENAMED Viewed

@@ -1,16 +1,15 @@
 import unittest, math
 from tinygrad import Tensor, Device, dtypes
 from tinygrad.ops import Ops
-from tinygrad.engine.schedule import create_schedule
 from tinygrad.helpers import CI
 import numpy as np
 from tinygrad.device import is_dtype_supported
 def _check_ast_count(desired_count:int, t:Tensor):
   # NOTE: this has side effect because everything can be scheduled only once
-  schedule = create_schedule(t.lazydata.lbs)
+  schedule = t.schedule()
   asts = [s for s in schedule if s.ast.op is Ops.SINK]
-  assert len(asts) == desired_count
+  assert len(asts) == desired_count, f"{len(asts)} != {desired_count}"
 class TestUnaryOpsConstFolding(unittest.TestCase):
   def test_all_consts_ops(self):
@@ -95,15 +94,17 @@ class TestBinaryOpsConstFolding(unittest.TestCase):
     _check_ast_count(0, Tensor([1.0, 2, 3, 4]) ** Tensor.ones(4))
   def test_literal_one_pow(self):
     _check_ast_count(0, 1 ** Tensor([1.0, 2, 3, 4]))
+  # TODO: pow simplification
   def test_tensor_one_pow(self):
-    _check_ast_count(0, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
+    _check_ast_count(1, Tensor.ones(4) ** Tensor([1.0, 2, 3, 4]))
 # folds advance indexing into basic indexing
 class TestIndexingConstFolding(unittest.TestCase):
   def test_scalar_index(self):
     t = Tensor.arange(16).float().reshape(1,1,4,4).realize()
     _check_ast_count(0, t[:,:,Tensor(1),:])
-    _check_ast_count(0, t[:,:,Tensor(1)+2,:])
+    # NOTE: this is no longer supported because the 1+2 isn't folding early.
+    #_check_ast_count(0, t[:,:,Tensor(1)+2,:])
     _check_ast_count(0, t[:,:,Tensor(1),Tensor(0)])
   @unittest.expectedFailure
@@ -130,11 +131,12 @@ class TestMovedConstFolding(unittest.TestCase):
   def test_cast_padded(self):
     # NOTE: this is folded due to CAST_BEFORE_VIEW
+    # update: CAST_BEFORE_VIEW=1 is no longer supported
     if is_dtype_supported(dtypes.int16):
-      _check_ast_count(0, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
+      _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16))
       np.testing.assert_equal(Tensor.ones(4).pad(((1, 1),)).cast(dtypes.int16).numpy(), [0, 1, 1, 1, 1, 0])
     if is_dtype_supported(dtypes.uint16):
-      _check_ast_count(0, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
+      _check_ast_count(1, Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16))
       np.testing.assert_equal(Tensor.full(4, fill_value=-1).pad(((1, 1),)).cast(dtypes.uint16).numpy(), [0, 65535, 65535, 65535, 65535, 0])
     # not folded
     if is_dtype_supported(dtypes.int64):
@@ -158,6 +160,37 @@ class TestReduceOpsConstFolding(unittest.TestCase):
     _check_ast_count(1, Tensor.ones(4).pad(((1, 1),)).exp().sum())
     np.testing.assert_allclose(Tensor.ones(4).pad(((1, 1),)).exp().sum().numpy(), 4 * math.e + 2)
+  def test_bool_zero_max(self):
+    _check_ast_count(0, Tensor.full((1, 2), True).shrink(((0, 1), (0, 0))).max((1, 0)))
+    np.testing.assert_equal(Tensor.full((1, 2), True).shrink(((0, 1), (0, 0))).max((1, 0)).numpy(), False)
+  def test_zero_size_ops(self):
+    for reduceop in [lambda x:x.prod(), lambda x:x.sum()]: # lambda x:x.max() NOTE: numpy gives "reduction operation maximum which has no identity"
+      _check_ast_count(0, reduceop(Tensor.empty(1, 0)))
+      np.testing.assert_equal(reduceop(Tensor.empty(shape:=(1, 0))).numpy(), reduceop(np.empty(shape)))
+  def test_zero_size_ops_view(self):
+    for reduceop in [lambda x:x.prod(), lambda x:x.sum()]:
+      _check_ast_count(0, reduceop(Tensor.empty(1, 0, 4).permute((1, 2, 0)).contiguous()))
+      np.testing.assert_equal(reduceop(Tensor.empty(shape:=(1, 0))).numpy(), reduceop(np.empty((shape))))
+  def test_zero_size_ops_realized(self):
+    for reduceop in [lambda x:x.prod(), lambda x:x.sum()]:
+      _check_ast_count(0, reduceop((Tensor.randn(0, 1)+1).realize()))
+      np.testing.assert_equal(reduceop((Tensor.randn(shape:=(0, 1))+1).realize()).numpy(), reduceop(np.empty(shape)))
+  def test_zero_size_realize_folded(self):
+    # non contiguous folded output doesn't realize
+    _check_ast_count(0, Tensor.empty(1, 0).sum())
+    # contiguous folded const can still schedule
+    a = Tensor.empty(1, 0).sum().contiguous()
+    _check_ast_count(2, a+2)
+    self.assertIsNotNone(a.lazydata.base.realized)
+    np.testing.assert_equal((Tensor.empty(1, 0).sum().contiguous()+2).numpy(), 2)
+    # otherwise we just fuse it
+    _check_ast_count(1, (Tensor.empty(1, 0).sum()+2).contiguous())
+    np.testing.assert_equal((Tensor.empty(1, 0).sum()+2).numpy(), 2)
   def test_const_prod(self):
     _check_ast_count(0, Tensor.full((2, 3), fill_value=2).prod())
     np.testing.assert_equal(Tensor.full((2, 3), fill_value=2).prod().numpy(), 2**(2*3))
@@ -206,6 +239,8 @@ class TestMultiConstFolding(unittest.TestCase):
     _check_ast_count(0, t ** 1)
     _check_ast_count(0, 1 ** t)
+  # failing because multi calls .contiguous() on every single sharded uop
+  @unittest.expectedFailure
   def test_multi_const_folding_tensor(self):
     ds = tuple(f"{Device.DEFAULT}:{i}" for i in range(4))
     t = Tensor.arange(16).float().realize().to(ds)

{tinygrad-0.10.0 → tinygrad-0.10.1}/test/test_conv_shapetracker.py RENAMED Viewed

@@ -3,7 +3,6 @@ import unittest
 from tinygrad.ops import Ops
 from tinygrad.tensor import Tensor
 from tinygrad.nn import Conv2d
-from tinygrad.engine.schedule import create_schedule
 from tinygrad.shape.shapetracker import ShapeTracker, View
 from tinygrad.helpers import prod
 from test.unit.test_shapetracker import shapetracker_getitem
@@ -11,13 +10,12 @@ from test.unit.test_shapetracker import shapetracker_getitem
 class TestConvShapetracker(unittest.TestCase):
   def test_conv_3x3_one_view(self):
     conv = Conv2d(16, 32, (3, 3))
     # first run to init the weights, they are scheduled.
-    create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata])
+    conv(Tensor.empty(1, 16, 10, 10)).schedule()
     # run it again to get the kernels
-    sched = [si for si in create_schedule([conv(Tensor.empty(1, 16, 10, 10)).lazydata]) if si.ast.op is Ops.SINK]
+    sched = [si for si in conv(Tensor.empty(1, 16, 10, 10)).schedule() if si.ast.op is Ops.SINK]
     assert len(sched) == 1, f"conv should only have one kernel, getting {len(sched)}"
-    for st in [x.st_arg for x in sched[0].ast.parents if x.op is Ops.LOAD]:
+    for st in [x.st_arg for x in sched[0].ast.toposort if x.op is Ops.LOAD]:
       assert len(st.views) == 1
   def test_conv_2x2_backward_one_view(self):
@@ -26,7 +24,7 @@ class TestConvShapetracker(unittest.TestCase):
     conv(X).mean().backward()
     si = X.grad.schedule()[-1]
     print(si)
-    ldb = [x for x in si.ast.parents if x.op is Ops.LOAD][0]
+    ldb = [x for x in si.ast.toposort if x.op is Ops.LOAD][0]
     st: ShapeTracker = ldb.st_arg.simplify()
     # NOTE: st.real_size() is broken
     print(si.inputs[0].size)

{tinygrad-0.10.0 → tinygrad-0.10.1}/test/test_dtype.py RENAMED Viewed

@@ -4,10 +4,10 @@ import torch
 from typing import Any, List
 from tinygrad.device import is_dtype_supported
 from tinygrad.helpers import getenv, DEBUG, CI
-from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16
+from tinygrad.dtype import DType, DTYPES_DICT, ImageDType, PtrDType, least_upper_float, least_upper_dtype, truncate_fp16, to_dtype
 from tinygrad import Device, Tensor, dtypes
 from tinygrad.tensor import _to_np_dtype
-from hypothesis import given, settings, strategies as strat
+from hypothesis import assume, given, settings, strategies as strat
 from test.helpers import rand_for_dtype
 import pytest
 pytestmark = pytest.mark.filterwarnings("ignore")
@@ -35,11 +35,11 @@ def _test_to_np(a:Tensor, np_dtype, target):
   except AssertionError as e:
     raise AssertionError(f"\ntensor {a.numpy()} does not match target {target} with np_dtype {np_dtype}") from e
-def _assert_eq(tensor:Tensor, target_dtype:DType, target):
+def _assert_eq(tensor:Tensor, target_dtype:DType, target, tol_target_dtype:float=1e-7):
   if DEBUG >= 2: print(tensor.numpy())
   try:
     assert tensor.dtype == target_dtype
-    np.testing.assert_allclose(tensor.numpy(), target, rtol={dtypes.float16:1e-3, dtypes.bfloat16:1e-2}.get(target_dtype, 1e-7))
+    np.testing.assert_allclose(tensor.numpy(), target, rtol={dtypes.float16:1e-3, dtypes.bfloat16:1e-2}.get(target_dtype, tol_target_dtype))
   except AssertionError as e:
     raise AssertionError(f"\ntensor {tensor.numpy()} dtype {tensor.dtype} does not match target {target} with dtype {target_dtype}") from e
@@ -52,13 +52,12 @@ def _test_cast(a:Tensor, target_dtype:DType):
   if target_dtype == dtypes.half and Device.DEFAULT == "PYTHON":
     # TODO: struct.pack cannot pack value > 65504 (max of half) into e format
     a = (a > 65504).where(65504, a)
-  if CI and Device.DEFAULT == "CLANG" and (target_dtype, a.dtype) in [(dtypes.double, dtypes.half), (dtypes.half, dtypes.double)]:
-    # TODO: cast between double and half are broken https://github.com/tinygrad/tinygrad/issues/4084
-    return
   _test_op(lambda: a.cast(target_dtype), target_dtype, list(a.numpy().astype(_to_np_dtype(target_dtype))))
 def _test_bitcast(a:Tensor, target_dtype:DType, target=None):
   if target_dtype == dtypes.bfloat16: raise unittest.SkipTest("no test for bf16 bitcast yet")
+  if getenv("PTX") and a.dtype == dtypes.int8 and target_dtype.itemsize != a.dtype.itemsize:
+    raise unittest.SkipTest("shape changing bitcast of int8 broken on PTX")
   _test_op(lambda: a.bitcast(target_dtype), target_dtype, target or a.numpy().view(_to_np_dtype(target_dtype)).tolist())
 class TestDType(unittest.TestCase):
@@ -99,7 +98,6 @@ class TestDType(unittest.TestCase):
       get_available_cast_dtypes(self.DTYPE)
   ))
   def test_bitcast(self):
-    if Device.DEFAULT == "WEBGL": raise unittest.SkipTest("no bitcast in WebGL GLSL")
     if self.DTYPE == dtypes.bool: raise unittest.SkipTest("no bools in bitcast")
     list(map(
       lambda dtype:
@@ -109,6 +107,9 @@ class TestDType(unittest.TestCase):
   def test_dtypes_fields(self):
     fields = dtypes.fields()
+    self.assertIn("float", fields)
+    self.assertIn("float32", fields)
+    self.assertEqual(len(fields), 24)
     self.assertTrue(all(isinstance(value, DType) for value in fields.values()))
     self.assertTrue(all(issubclass(_to_np_dtype(value), np.generic) for value in fields.values() if _to_np_dtype(value) is not None))
@@ -117,7 +118,9 @@ class TestDType(unittest.TestCase):
     data = [1., 2., 0., 0.5, -1.5, 5.25]
     for dt in dtypes:
       arr = np.asarray(data).astype(dt)
-      tin = Tensor(arr).numpy()
+      tensor = Tensor(arr)
+      if not is_dtype_supported(tensor.dtype): continue
+      tin = tensor.numpy()
       tor = torch.as_tensor(arr).detach().numpy()
       assert dt == tin.dtype == tor.dtype, f"dtype mismatch: expected={dt} | tinygrad={tin.dtype} | torch={tor.dtype}"
       np.testing.assert_allclose(tin, tor, atol=1e-6, rtol=1e-3)
@@ -244,6 +247,11 @@ class TestInt8DType(TestDType):
   def test_int8_to_uint16_negative(self):
     _test_op(lambda: Tensor([-1, -2, -3, -4], dtype=dtypes.int8).cast(dtypes.uint16), dtypes.uint16, [2**16-1, 2**16-2, 2**16-3, 2**16-4])
+  @unittest.skipIf(getenv("PTX"), "broken in ptx")
+  def test_bitcast_alt(self):
+    a = Tensor([72, -90, 27, 40, -53, 70, 96, 51], dtype=dtypes.int8).bitcast(dtypes.short)
+    self.assertListEqual(a.tolist(), [-22968, 10267, 18123, 13152])
 class TestUint8DType(TestDType):
   DTYPE = dtypes.uint8
   @unittest.skipIf(getenv("CUDA",0)==1 or getenv("PTX", 0)==1, "cuda saturation works differently")
@@ -254,7 +262,9 @@ class TestUint8DType(TestDType):
 class TestBitCast(unittest.TestCase):
   @given(strat.sampled_from(dtype_ints + dtype_floats), strat.sampled_from(dtype_ints + dtype_floats))
   def test_shape_change_bitcast(self, dt1, dt2):
-    if dt2 == dtypes.bfloat16: raise unittest.SkipTest("no test for bf16 bitcast yet")
+    # NOTE: this has to be assume to prevent hypothesis from skipping all samples
+    assume(dt2 != dtypes.bfloat16 and dt1 != dtypes.bfloat16) # no test for bf16 bitcast yet
+    assume(not (getenv("PTX") and dt1 == dtypes.int8)) # TODO: bitcasting int8 fails in PTX
     data = rand_for_dtype(dt1, 32).reshape(2, 2, 8)
     _test_op(lambda: Tensor(data, dtype=dt1).bitcast(dt2), dt2, data.view(_to_np_dtype(dt2)).tolist())
@@ -355,7 +365,7 @@ class TestEqStrDType(unittest.TestCase):
   def test_strs(self):
     if PtrDType is None: raise unittest.SkipTest("no PtrDType support")
     self.assertEqual(str(dtypes.imagef((1,2,4))), "dtypes.imagef((1, 2, 4))")
-    self.assertEqual(str(dtypes.float32.ptr()), "dtypes.float.ptr()")
+    self.assertEqual(str(dtypes.float32.ptr(16)), "dtypes.float.ptr(16)")
 class TestHelpers(unittest.TestCase):
   signed_ints = (dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64)
@@ -538,7 +548,7 @@ class TestTypeSpec(unittest.TestCase):
       _assert_eq(Tensor.arange(5, dtype=dtypes.int64), dtypes.int64, np.arange(5))
     if is_dtype_supported(dtypes.float16):
       _assert_eq(Tensor.arange(5, dtype=dtypes.float16), dtypes.float16, np.arange(5))
-    _assert_eq(Tensor.arange(3, 9, 0.7), dtypes.default_float, np.arange(3, 9, 0.7))
+    _assert_eq(Tensor.arange(3, 9, 0.7), dtypes.default_float, np.arange(3, 9, 0.7), 1e-6 if Device.DEFAULT == "WEBGPU" else 1e-7)
     _assert_eq(Tensor.arange(3, 8.5, 3), dtypes.default_float, np.arange(3, 8.5, 3))
     # stop-start and step have different signs
     _assert_eq(Tensor.arange(3, 5, -2), dtypes.default_int, np.arange(3, 5, -2))
@@ -642,8 +652,7 @@ class TestAutoCastType(unittest.TestCase):
   def test_broadcast_scalar(self, dt):
     assert (Tensor.ones(4, 4, dtype=dt) + 2.3).dtype == (dt if dtypes.is_float(dt) else dtypes.default_float)
     assert (Tensor.ones(4, 4, dtype=dt) + 2).dtype == (dt if dtypes.is_float(dt) or dtypes.is_int(dt) else dtypes.default_int)
-    if Device.DEFAULT != "WEBGPU" and dt != dtypes.bool:
-      assert (Tensor.ones(4, 4, dtype=dt) + True).dtype == dt
+    assert (Tensor.ones(4, 4, dtype=dt) + True).dtype == dt
   def test_sum(self):
     assert (Tensor([0, 1], dtype=dtypes.bool)).sum().dtype == dtypes.int32
@@ -772,7 +781,8 @@ class TestAutoCastType(unittest.TestCase):
         if DEBUG >= 2:
           print(f"testing {default_dtype=}, {dtype=}")
         a = Tensor([1, 2, 3], dtype=dtype, requires_grad=True)
-        b = (a * 5).sum()
+        # NOTE: this is broken without default_dtype because of CAST_BEFORE_VIEW
+        b = (a * 5).sum(acc_dtype=default_dtype)
         b.backward()  # if there is dtype mismatch, lazy should assert
         assert a.grad.dtype == a.dtype
         np.testing.assert_allclose(a.grad.numpy(), [5, 5, 5])
@@ -851,5 +861,18 @@ class TestDtypeUsage(unittest.TestCase):
         t = Tensor([[1, 2], [3, 4]], dtype=d)
         (t*t).max().item()
+class TestToDtype(unittest.TestCase):
+  def test_dtype_to_dtype(self):
+    dtype = dtypes.int32
+    res = to_dtype(dtype)
+    self.assertIsInstance(res, DType)
+    self.assertEqual(res, dtypes.int32)
+  def test_str_to_dtype(self):
+    dtype = "int32"
+    res = to_dtype(dtype)
+    self.assertIsInstance(res, DType)
+    self.assertEqual(res, dtypes.int32)
 if __name__ == '__main__':
   unittest.main()

tinygrad 0.10.0__tar.gz → 0.10.1__tar.gz

tinygrad 0.10.0tar.gz → 0.10.1tar.gz